2014-12-07 83 views
1

我有以下的數據幀:的R - 有條件行減法

a <- structure(list(half.fac = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("2006-08-29 05:00:00", 
"2006-08-29 05:30:00", "2006-08-29 06:00:00", "2006-08-29 06:30:00", 
"2006-08-29 07:00:00", "2006-08-29 07:30:00", "2006-08-29 08:00:00", 
"2006-08-29 08:30:00", "2006-08-29 09:00:00", "2006-08-29 09:30:00", 
"2006-08-29 10:00:00", "2006-08-29 10:30:00", "2006-08-29 11:00:00", 
"2006-08-29 11:30:00", "2006-08-29 12:00:00", "2006-08-29 12:30:00", 
"2006-08-29 13:00:00", "2006-08-29 13:30:00", "2006-08-29 14:00:00", 
"2006-08-29 14:30:00", "2006-08-29 15:00:00", "2006-08-29 15:30:00", 
"2006-08-29 16:00:00", "2006-08-29 16:30:00", "2006-08-29 17:00:00", 
"2006-08-29 17:30:00", "2006-08-29 18:00:00", "2006-08-29 18:30:00", 
"2006-08-29 19:00:00", "2006-08-29 19:30:00", "2006-08-29 20:00:00", 
"2006-08-29 20:30:00", "2006-08-29 21:00:00", "2006-08-29 21:30:00", 
"2006-08-29 22:00:00", "2006-08-29 22:30:00", "2006-08-29 23:00:00", 
"2006-08-29 23:30:00", "2006-08-30 00:00:00", "2006-08-30 00:30:00", 
"2006-08-30 01:00:00", "2006-08-30 01:30:00", "2006-08-30 02:00:00", 
"2006-08-30 02:30:00", "2006-08-30 03:00:00", "2006-08-30 03:30:00", 
"2006-08-30 04:00:00", "2006-08-30 04:30:00", "2006-08-30 05:00:00" 
), class = "factor"), Ux = c(0.354, 0.38225, 0.48725, 0.43425, 
0.3825, 0.406, 0.45125, 0.5975, 0.62525, 0.587, 0.57925, 0.57125, 
0.52725, 0.4245, 0.37775, 0.4355, 0.4585, 0.3125, 0.4075, 0.17275 
), Ts = c(16.0024, 16.2281, 16.2468, 16.0788, 16.0465, 15.8956, 
15.8922, 15.9074, 15.7582, 15.7938, 15.8565, 15.6904, 15.6073, 
15.6632, 15.6344, 15.6531, 15.7006, 15.6989, 15.6938, 15.3022 
)), .Names = c("half.fac", "Ux", "Ts"), row.names = c(NA, 20L 
), class = "data.frame") 

b <- structure(list(half.fac = structure(1:49, .Label = c("2006-08-29 05:00:00", 
"2006-08-29 05:30:00", "2006-08-29 06:00:00", "2006-08-29 06:30:00", 
"2006-08-29 07:00:00", "2006-08-29 07:30:00", "2006-08-29 08:00:00", 
"2006-08-29 08:30:00", "2006-08-29 09:00:00", "2006-08-29 09:30:00", 
"2006-08-29 10:00:00", "2006-08-29 10:30:00", "2006-08-29 11:00:00", 
"2006-08-29 11:30:00", "2006-08-29 12:00:00", "2006-08-29 12:30:00", 
"2006-08-29 13:00:00", "2006-08-29 13:30:00", "2006-08-29 14:00:00", 
"2006-08-29 14:30:00", "2006-08-29 15:00:00", "2006-08-29 15:30:00", 
"2006-08-29 16:00:00", "2006-08-29 16:30:00", "2006-08-29 17:00:00", 
"2006-08-29 17:30:00", "2006-08-29 18:00:00", "2006-08-29 18:30:00", 
"2006-08-29 19:00:00", "2006-08-29 19:30:00", "2006-08-29 20:00:00", 
"2006-08-29 20:30:00", "2006-08-29 21:00:00", "2006-08-29 21:30:00", 
"2006-08-29 22:00:00", "2006-08-29 22:30:00", "2006-08-29 23:00:00", 
"2006-08-29 23:30:00", "2006-08-30 00:00:00", "2006-08-30 00:30:00", 
"2006-08-30 01:00:00", "2006-08-30 01:30:00", "2006-08-30 02:00:00", 
"2006-08-30 02:30:00", "2006-08-30 03:00:00", "2006-08-30 03:30:00", 
"2006-08-30 04:00:00", "2006-08-30 04:30:00", "2006-08-30 05:00:00" 
), class = "factor"), Ux = c(0.386129423856881, 0.376585152777778, 
0.223084, 0.324716694444444, 0.284751444444444, 0.330879361111111, 
0.435775208333333, 0.615431625, 0.0626402222222222, 0.294361277777778, 
0.464928125, 0.194410652777778, 0.162782402777778, 0.452202958333333, 
0.115627805555556, -0.504861722222222, -0.519877875, -0.472751402777778, 
-0.304711708333333, -0.422057305555556, -0.366469180555556, -0.733386694444444, 
-0.547648027777778, -0.755199347222222, -0.550659486111111, -0.5423535, 
-0.366822472222222, -0.383240513888889, -0.310595805555556, -0.52455025, 
-0.362563625, -0.652825569444444, -0.610772416666667, -0.410371666666667, 
-0.472121152777778, -0.425157875, -0.402904430555556, -0.500900541666667, 
-0.212771525026505, -0.311444125, -0.2838, -0.329137972222222, 
0.0119679583333333, -0.0432450416666667, -0.116798097222222, 
-0.0186036944444444, 0.18647825, 0.0159295694444444, 0.2725), 
    Ts = c(15.6259581698983, 15.2544032611111, 15.3477244944444, 
    16.0545841722222, 16.8355633888889, 17.7415598722222, 18.6617318333333, 
    19.5413804333333, 20.3955452333333, 21.3235478222222, 22.1279614944444, 
    23.0201452888889, 23.5360555222222, 24.1431266555556, 24.6397015777778, 
    24.9368367833333, 24.8767400111111, 25.0295079222222, 24.9217255611111, 
    24.7817519388889, 25.0392368777778, 24.8607259111111, 24.6273860888889, 
    23.9305384555556, 23.6246723388889, 23.3412936666667, 22.5086166611111, 
    21.3677308722222, 20.4862971111111, 19.9382697666667, 19.1141656277778, 
    18.9857482722222, 18.6177629666667, 18.06932875, 17.7128214, 
    17.2961282944444, 16.8209205833333, 16.9382630722222, 16.2425613023827, 
    16.31353845, 16.2208566277778, 16.0621524611111, 15.3475440055556, 
    14.9305123722222, 14.3440030833333, 13.6119074444444, 13.3696283111111, 
    13.4342943555556, 13.2741)), .Names = c("half.fac", "Ux.30min", 
"Ts.30min"), row.names = c(NA, -49L), class = "data.frame") 

其中a是我的一整天和b高頻測量是其平均每半小時。

我需要做的是,對於每一行,從$ Ux IF減去b $ Ux.30分鐘,並且只有在半小時分數相同的情況下($ half.fac = b $ half.fac) 。對於Ts也是這樣。

我心目中什麼是一樣的東西:

if a$half.fac = b$half.fac then 

    a$Ts.diff <- a$Ts - b$Ts.30min 
    a$Ux.diff <- a$Ux - b$Ux.30min 

endif 

的人誰可能是熟悉,我想對數據進行分解雷諾茲。

不管方法/包,理想情況下它應該儘可能高效(快速),因爲實際上我的數據幀「a」幾乎是一百萬行!

任何想法?

非常感謝提前!

回答

1

下面是使用dplyr類似的方法。第一步是合併您的ab。然後,您將進行計算並創建新列。

library(dplyr) 

inner_join(a, b) %>% 
    mutate(Ux.Ux30 = Ux - Ux.30min, 
      Ts.Ts30 = Ts - Ts.30min) 

# A part of the results 
#    half.fac  Ux  Ts Ux.30min Ts.30min  Ux.Ux30  Ts.Ts30 
#1 2006-08-29 05:00:00 0.35400 16.0024 0.3861294 15.62596 -0.032129424 0.37644183 
#2 2006-08-29 05:00:00 0.38225 16.2281 0.3861294 15.62596 -0.003879424 0.60214183 
#3 2006-08-29 05:00:00 0.48725 16.2468 0.3861294 15.62596 0.101120576 0.62084183 
#4 2006-08-29 05:00:00 0.43425 16.0788 0.3861294 15.62596 0.048120576 0.45284183 
#5 2006-08-29 05:00:00 0.38250 16.0465 0.3861294 15.62596 -0.003629424 0.42054183 

EXTRA

下面是使用data.table的一種方式。這是我天真的嘗試。

#Convert data.frame to data.table 
setDT(a) 
setDT(b) 

# Set a key column for merging a and b 
setkey(a, "half.fac") 
setkey(b, "half.fac") 

# Merge a and b, and do the calculation 
b[a][, `:=`(Ux.Ux30 = Ux - Ux.30min, Ts.Ts30 = Ts - Ts.30min)] 
+0

這個解決方案非常快!謝謝! – thiagoveloso 2014-12-07 04:27:16

+0

高興。如果你使用'data.table',它會更快。 – jazzurro 2014-12-07 04:29:01

+0

願意提供一個例子嗎? – thiagoveloso 2014-12-07 04:36:03

1

第一合併A和B:

ab <- merge(a, b, by=1) 
ab$Ts.diff <- ab$Ts - ab$Ts.30min 
ab$Ux.diff <- ab$Ux - bb$Ux.30min 

隨着示例給出:

> ab$Ts.diff <- ab$Ts - ab$Ts.30min 
>  ab$Ux.diff <- ab$Ux - ab$Ux.30min 
> ab 
       half.fac  Ux  Ts Ux.30min Ts.30min  Ts.diff 
1 2006-08-29 05:00:00 0.35400 16.0024 0.3861294 15.62596 0.37644183 
2 2006-08-29 05:00:00 0.38225 16.2281 0.3861294 15.62596 0.60214183 
3 2006-08-29 05:00:00 0.48725 16.2468 0.3861294 15.62596 0.62084183 
4 2006-08-29 05:00:00 0.43425 16.0788 0.3861294 15.62596 0.45284183 
5 2006-08-29 05:00:00 0.38250 16.0465 0.3861294 15.62596 0.42054183 
6 2006-08-29 05:00:00 0.40600 15.8956 0.3861294 15.62596 0.26964183 
7 2006-08-29 05:00:00 0.45125 15.8922 0.3861294 15.62596 0.26624183 
8 2006-08-29 05:00:00 0.59750 15.9074 0.3861294 15.62596 0.28144183 
9 2006-08-29 05:00:00 0.62525 15.7582 0.3861294 15.62596 0.13224183 
10 2006-08-29 05:00:00 0.58700 15.7938 0.3861294 15.62596 0.16784183 
11 2006-08-29 05:00:00 0.57925 15.8565 0.3861294 15.62596 0.23054183 
12 2006-08-29 05:00:00 0.57125 15.6904 0.3861294 15.62596 0.06444183 
13 2006-08-29 05:00:00 0.52725 15.6073 0.3861294 15.62596 -0.01865817 
14 2006-08-29 05:00:00 0.42450 15.6632 0.3861294 15.62596 0.03724183 
15 2006-08-29 05:00:00 0.37775 15.6344 0.3861294 15.62596 0.00844183 
16 2006-08-29 05:00:00 0.43550 15.6531 0.3861294 15.62596 0.02714183 
17 2006-08-29 05:00:00 0.45850 15.7006 0.3861294 15.62596 0.07464183 
18 2006-08-29 05:00:00 0.31250 15.6989 0.3861294 15.62596 0.07294183 
19 2006-08-29 05:00:00 0.40750 15.6938 0.3861294 15.62596 0.06784183 
20 2006-08-29 05:00:00 0.17275 15.3022 0.3861294 15.62596 -0.32375817 
     Ux.diff 
1 -0.032129424 
2 -0.003879424 
3 0.101120576 
4 0.048120576 
5 -0.003629424 
6 0.019870576 
7 0.065120576 
8 0.211370576 
9 0.239120576 
10 0.200870576 
11 0.193120576 
12 0.185120576 
13 0.141120576 
14 0.038370576 
15 -0.008379424 
16 0.049370576 
17 0.072370576 
18 -0.073629424 
19 0.021370576 
20 -0.213379424 
+0

Thanks @BondedDust,這也是非常快速和易於閱讀! – thiagoveloso 2014-12-07 04:35:01