2013-05-03 58 views
9

我有一個巨大的數據幀計算,最後30行是如下:無法執行,使用R data.table包

libary(data.table) 

dput(P)

structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075, 
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255, 
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435, 
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615, 
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795, 
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt" 
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002", 
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002", 
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L, 
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L, 
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8, 
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6, 
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7, 
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6, 
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21, 
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45, 
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94, 
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L, 
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L, 
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53, 
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887, 
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054, 
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488, 
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702, 
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT", 
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE", 
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA, 
-30L)) 

當我喜歡calcualate一些值使用data.table如下:

p<-data.table(p) 
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")] 

我得到這個錯誤:

Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100, : 
    Type of RHS ('integer') must match LHS ('double'). To check and coerce would 
    impact performance too much for the fastest cases. Either change the type of 
    the target column, or coerce the RHS of := yourself (e.g. by using 1L instead 
    of 1) 

這個錯誤是什麼意思?我怎樣才能解決這個錯誤?

+0

使用data.table 1.8.8運行上面的代碼時,我沒有得到這樣的錯誤。 – 2013-05-03 16:19:24

回答

9

問題是您的ifelse語句對某些值返回integer類型,對於其他某些條目返回numeric(double)。並且data.table抱怨列類型中的不匹配,因爲它期望強制由用戶執行(出於錯誤中給出的性能原因)。所以,只需用as.numeric來包裝它,這樣所有的值都會被轉換爲double。

p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, 
         USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")] 
3

我這樣做:

sapply(p, class) 

,發現我的專欄之一是整數。然後,我這樣做:

x<-x[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, ((USED_CORES/ENT)*100), as.numeric(USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")] 

,這是金錢