2017-12-27 119 views
0

我有DF看起來像這樣集團的數據,並計算平均值,SD

 options(scipen=999) 
     df = data.frame(imei = c(35745407328, 35745407328, 35745407328, 
           35745407328, 35745407328, 35745407328, 
           35745407328, 35745407328, 35745407328, 
           35745407328, 35745407328, 35745407328), 
         ign = c("Off", "Off", "Off", "On", 
           "On", "Off", "Off", "On", 
           "On", "On", "On", "On"), 
         unixTime = c(1514313014000, 1514313074000, 1514313134000, 1514313194000, 
             1514313254000, 1514313314000, 1514313374000, 1514313434000, 
             1514313494000, 1514313554000, 1514313614000, 1514313674000)) 

DF

 ---------------------------------- 
     imei   ign unixTime 
     ---------------------------------- 
     35745407328  Off 1514313014000 
     ---------------------------------- 
     35745407328  Off 1514313074000 
     ---------------------------------- 
     35745407328  Off 1514313134000 
     ---------------------------------- 
     35745407328  On 1514313194000 
     ---------------------------------- 
     35745407328  On 1514313254000 
     ---------------------------------- 
     35745407328  Off 1514313314000 
     ---------------------------------- 
     35745407328  Off 1514313374000 
     ---------------------------------- 
     35745407328  On 1514313434000 
     ---------------------------------- 
     35745407328  On 1514313494000 
     ---------------------------------- 
     35745407328  On 1514313554000 
     ---------------------------------- 
     35745407328  On 1514313614000 
     ---------------------------------- 
     35745407328  On 1514313674000 
     ---------------------------------- 

我希望將基於「IGN」上面的數據,然後

我想計算平均值,即unixTime的標準偏差。

而基於組差異的第一和最後一個值unixTime

的 分組基於這意味着,SD和差異將被計算

 ---------------------------------- 
     imei   ign unixTime 
     ---------------------------------- 
     35745407328  Off 1514313014000 
     ---------------------------------- 
     35745407328  Off 1514313074000 
     ---------------------------------- 
     35745407328  Off 1514313134000 
     ---------------------------------- 

     ---------------------------------- 
     35745407328  On 1514313194000 
     ---------------------------------- 
     35745407328  On 1514313254000 
     ---------------------------------- 

     ---------------------------------- 
     35745407328  Off 1514313314000 
     ---------------------------------- 
     35745407328  Off 1514313374000 
     ---------------------------------- 

     ---------------------------------- 
     35745407328  On 1514313434000 
     ---------------------------------- 
     35745407328  On 1514313494000 
     ---------------------------------- 
     35745407328  On 1514313554000 
     ---------------------------------- 
     35745407328  On 1514313614000 
     ---------------------------------- 
     35745407328  On 1514313674000 
     ---------------------------------- 

請幫我這個

如果答案已經可用。請給我提供鏈接。謝謝

回答

1

使用的解決方案。

library(data.table) 

setDT(df) 

df2 <- df[, Group := rleid(ign)][ 
    , c("Mean", "SD", "Diff") := list(mean(unixTime), 
            sd(unixTime), 
            first(unixTime) - last(unixTime)), 
    by = Group] 

df2[] 
#   imei ign  unixTime Group   Mean  SD Diff 
# 1: 35745407328 Off 1514313014000  1 1514313074000 60000.00 -120000 
# 2: 35745407328 Off 1514313074000  1 1514313074000 60000.00 -120000 
# 3: 35745407328 Off 1514313134000  1 1514313074000 60000.00 -120000 
# 4: 35745407328 On 1514313194000  2 1514313224000 42426.41 -60000 
# 5: 35745407328 On 1514313254000  2 1514313224000 42426.41 -60000 
# 6: 35745407328 Off 1514313314000  3 1514313344000 42426.41 -60000 
# 7: 35745407328 Off 1514313374000  3 1514313344000 42426.41 -60000 
# 8: 35745407328 On 1514313434000  4 1514313554000 94868.33 -240000 
# 9: 35745407328 On 1514313494000  4 1514313554000 94868.33 -240000 
# 10: 35745407328 On 1514313554000  4 1514313554000 94868.33 -240000 
# 11: 35745407328 On 1514313614000  4 1514313554000 94868.33 -240000 
# 12: 35745407328 On 1514313674000  4 1514313554000 94868.33 -240000 

或者使用

library(dplyr) 
library(data.table) 

df2 <- df %>% 
    group_by(Group = rleid(ign)) %>% 
    mutate(Mean = mean(unixTime), SD = sd(unixTime), 
     Diff = first(unixTime) - last(unixTime)) %>% 
    ungroup() 
df2 
#   imei ign  unixTime Group   Mean  SD Diff 
#   <dbl> <fctr>   <dbl> <int>   <dbl> <dbl> <dbl> 
# 1 35745407328 Off 1514313014000  1 1514313074000 60000.00 -120000 
# 2 35745407328 Off 1514313074000  1 1514313074000 60000.00 -120000 
# 3 35745407328 Off 1514313134000  1 1514313074000 60000.00 -120000 
# 4 35745407328  On 1514313194000  2 1514313224000 42426.41 -60000 
# 5 35745407328  On 1514313254000  2 1514313224000 42426.41 -60000 
# 6 35745407328 Off 1514313314000  3 1514313344000 42426.41 -60000 
# 7 35745407328 Off 1514313374000  3 1514313344000 42426.41 -60000 
# 8 35745407328  On 1514313434000  4 1514313554000 94868.33 -240000 
# 9 35745407328  On 1514313494000  4 1514313554000 94868.33 -240000 
# 10 35745407328  On 1514313554000  4 1514313554000 94868.33 -240000 
# 11 35745407328  On 1514313614000  4 1514313554000 94868.33 -240000 
# 12 35745407328  On 1514313674000  4 1514313554000 94868.33 -240000 
的解決方案