2016-08-17 38 views
0

我(樣品)數據如下所示:條件重新編碼和總和中的R

mydata <- structure(list(x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), x2 = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 4L, 4L), x3 = c(1L, 3L, 5L, 1L, 3L, 5L, 1L, 4L, 5L, 2L, 
1L, 5L, 6L, 6L), week = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 30L, 50L, 
22L, 52L, 36L, 25L, 26L), newar1 = c(0L, 0L, 2L, 0L, 0L, 2L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), newvar2 = c(0L, 2L, 0L, 0L, 
2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), .Names = c("x1", "x2", 
"x3", "week", "newar1", "newvar2"), class = "data.frame", row.names = c(NA, 
-14L)) 



x1 x2 x3 week 
0 1 1 0 
0 2 3 0 
0 3 5 0 
0 1 1 0 
0 2 3 0 
0 3 5 0 
1 1 1 1 
1 2 4 30 
1 3 5 50 
1 1 2 22 
1 2 1 52 
1 3 5 36 
1 4 6 25 
1 4 6 26 

我想創建1個新的變量newvar1

  • 如果X1 = 0 =>我會喜歡統計整個數據集中x1等於1的次數(只有其他行,不包括自己的觀測值),但只計算具有相同組合x2和x3的行以及週數大於24的行。

  • 如果x1 = 1 =>我想要統計整個數據集中x1等於1的次數,但只計算x2和x3相同組合以及行數減25大於零的行((周-25)> 0))。

「sum」是指條件成立時x1等於1的次數。

通過「如果」我的意思是我只想總結x1當條件如下舉行。基本上我的問題是:我怎麼只能根據條件來總結某些值?

我的數據應該是這樣的:

x1 x2 x3 week newvar1 
0 1 1 0 0  
0 2 3 0 0  
0 3 5 0 2  
0 1 1 0 0  
0 2 3 0 0  
0 3 5 0 2  
1 1 1 1 0  
1 2 4 30 0  
1 3 5 50 1  
1 1 2 22 0  
1 2 1 52 0  
1 3 5 36 0  
1 4 6 25 0  
1 4 6 26 1  

目前,我有下面的代碼,但是這並不考慮到x2=x3和一週的約束。任何建議如何做到這一點?

mydata[,newvar1:=sum(x1), by=list(x2,x3)] 
+0

請使用dput()提供一個可重複的示例 –

+0

包括,謝謝。 – research111

+2

我在理解'newvar1'和'newvar2'的標準時遇到了很多麻煩。 – jdobres

回答

0

我認爲,我們可以用一個for循環來完成這件事:

for(i in 1:nrow(mydata)){ 
if(mydata[i,1] == 0){ # x1 == 0 
    mydata[i,]$newvar1 = 
    sum(mydata[-i,1] == 1 & # count where x1 == 1 
     mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3) 
     mydata[i,3] == mydata[-i,3] & 
     mydata[-i,4] > 24) # and week > 24 
}else{ # x1 == 1 
    mydata[i,]$newvar1 = 
     sum(mydata[-i,1] == 1 & # count where x1 == 1 
      mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3) 
      mydata[i,3] == mydata[-i,3] & 
      mydata[-i,4] > 25) # and week > 25 
    } 
} 

# mydata 
# x1 x2 x3 week newvar1 
# 1 0 1 1 0  0 
# 2 0 2 3 0  0 
# 3 0 3 5 0  2 
# 4 0 1 1 0  0 
# 5 0 2 3 0  0 
# 6 0 3 5 0  2 
# 7 1 1 1 1  0 
# 8 1 2 4 30  0 
# 9 1 3 5 50  1 
# 10 1 1 2 22  0 
# 11 1 2 1 52  0 
# 12 1 3 5 36  1 
# 13 1 4 6 25  1 
# 14 1 4 6 26  0 

另外,如果,x1 == 1要將數據上所有行比較:

for(i in 1:nrow(mydata)){ 
    if(mydata[i,1] == 0){ # x1 == 0 
     mydata[i,]$newvar1 = 
      sum(mydata[-i,1] == 1 & #count where x1 = 1 
        mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3) 
        mydata[i,3] == mydata[-i,3] & 
        mydata[-i,4] > 24) # and week > 24 
    }else{ 
     mydata[i,]$newvar1 = 
      sum(mydata[,1] == 1 & 
       mydata[i,2] == mydata[,2] & 
       mydata[i,3] == mydata[,3] & 
       mydata[,4] > 25) 
    } 
} 

# mydata 
# x1 x2 x3 week newvar1 
# 1 0 1 1 0  0 
# 2 0 2 3 0  0 
# 3 0 3 5 0  2 
# 4 0 1 1 0  0 
# 5 0 2 3 0  0 
# 6 0 3 5 0  2 
# 7 1 1 1 1  0 
# 8 1 2 4 30  1 
# 9 1 3 5 50  2 
# 10 1 1 2 22  0 
# 11 1 2 1 52  1 
# 12 1 3 5 36  2 
# 13 1 4 6 25  1 
# 14 1 4 6 26  1 
0
mydata$newvar1 <- ifelse(mydata$x1==0, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week > 25 & x2==x2[i] & x3==x3[i]]))), 0) 
mydata$newvar1 <- ifelse(mydata$x1==1, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week < week[i] & week[i]!=0 & week-week[i]<25 & x2==x2[i] & x3==x3[i]]))), mydata$newvar1) 
0

使用dplyr

library(dplyr) 
mydata %>% group_by(x2, x3) %>% 
    mutate(newvar1 = ifelse(x1 == 0, 
          sum(x1 * week > 24), 
          sum(x1 * week > 25) - (week > 25) * (x1 == 1))) 
# Source: local data frame [14 x 6] 
# Groups: x2, x3 [7] 
# 
#  x1 x2 x3 week newvar2 newvar1 
# <int> <int> <int> <int> <int> <int> 
# 1  0  1  1  0  0  0 
# 2  0  2  3  0  2  0 
# 3  0  3  5  0  0  2 
# 4  0  1  1  0  0  0 
# 5  0  2  3  0  2  0 
# 6  0  3  5  0  0  2 
# 7  1  1  1  1  0  0 
# 8  1  2  4 30  0  0 
# 9  1  3  5 50  1  1 
# 10  1  1  2 22  0  0 
# 11  1  2  1 52  0  0 
# 12  1  3  5 36  0  1 
# 13  1  4  6 25  0  1 
# 14  1  4  6 26  0  0 

else條件,- (week > 25) * (x1 == 1)怪異位,是從否則將匹配自身行減去1。