2016-07-29 74 views
1

我認爲標題解釋了一切。我想在兩個數據集之間做t.test。我想逐行比較。T.test在兩個數據集之間 - 逐行

讓我們使用mtcars作爲那個,稍作修改mtcars_mod

structure(list(mpg = c(21, 25, 22.8, 21.4, 18.7, 18.1, 14.3, 
         24.4, 24.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
         36.4, 31.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
         29.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
               8, 8, 8, 8, 7, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
       disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
         167.6, 167.6, 275.8, 275.8, 275.8, 6, 460, 440, 78.7, 75.7, 
         71.1, 120.1, 318, 304, 350, 400, 79, 15, 97, 351, 145, 
         301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 
              123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 
              150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 
                            3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
                            3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
                            3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 
             ), wt = c(2.62, 2.875, 2.32, 7, 3.44, 3.46, 3.57, 3.19, 
                3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
                1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
                1.513, 3.17, 2.77, 6, 2.78), qsec = c(16.46, 17.02, 18.61, 
                          114, 17.02, 20.22, 15.84, 12, 22.9, 18.3, 18.9, 17.4, 17.6, 
                          18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
                          32, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 
                ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
                   0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 
                                 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
                                 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, 
                                            3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
                                            3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 
                                                     2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
                                                     2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp", 
                                                             "drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4", 
                                                                             "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", 
                                                                             "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", 
                                                                             "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
                                                                             "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
                                                                             "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
                                                                             "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
                                                                             "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
                                                                             "Volvo 142E"), class = "data.frame" 

我試圖在循環中做它,但我不知道如何存儲結果。我只得到最後一個值...

for(z in 1:nrow(mtcars)){ 
    vec_1 <- mtcars[z,1:7] 
    vec_2 <- mtcars_mod[z,1:7] 
    vec_results <- unlist(t.test(vec_1, vec_2)[3]) 

} 

有人可以告訴我如何糾正我的循環?我寧願使用apply函數,但仍然想知道我的循環做了什麼錯誤... ...

回答

3

(我會用我自己修改的mtcarsmod ...對不起,你的缺少至少一個paren和 - 雖然我知道到底發生了什麼 - 這是在SO窗口醜陋

set.seed(42) 
mtcarsmod <- as.data.frame(lapply(mtcars, jitter, factor = 5)) 
head(mtcarsmod) 
# mpg cyl disp hp drat wt qsec  vs  am gear carb 
# 1 21.1 5.55 160 109.7 3.89 2.62 16.5 -0.373 0.221 3.68 3.861 
# 2 21.1 6.74 160 110.0 3.90 2.88 17.0 0.641 1.080 3.06 3.788 
# 3 22.8 2.02 108 93.5 3.86 2.32 18.6 0.614 1.142 4.73 0.284 
# 4 21.5 7.33 258 110.2 3.08 3.21 19.4 0.371 0.238 3.46 0.560 
# 5 18.7 6.03 360 175.3 3.15 3.44 17.0 -0.903 0.430 2.63 2.130 
# 6 18.1 4.83 225 104.4 2.77 3.46 20.2 0.491 -0.753 2.77 1.870 

而是一個循環,你可能應該使用sapply或其親屬之一。

sapply(seq_len(nrow(mtcars)), 
     function(r) unlist(t.test(mtcars[r,1:7], mtcarsmod[r,1:7])[3])) 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
# 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
# 0.995 0.999 0.999 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
# 0.997 0.998 1.000 0.990 0.997 0.999 0.999 0.995 0.997 0.995 

一個優勢,使用lapply可能會使用更多的測試結果。例如:

sapply(ret, `[[`, "p.value") 
# [1] 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 0.995 0.999 0.999 
# [15] 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 0.997 0.998 1.000 0.990 0.997 0.999 
# [29] 0.999 0.995 0.997 0.995 

ret <- lapply(seq_len(nrow(mtcars)), 
       function(r) t.test(mtcars[r,1:7], mtcarsmod[r,1:7])) 
str(head(ret, n = 2)) 
# List of 2 
# $ :List of 9 
# ..$ statistic : Named num 0.0024 
# .. ..- attr(*, "names")= chr "t" 
# ..$ parameter : Named num 12 
# .. ..- attr(*, "names")= chr "df" 
# ..$ p.value : num 0.998 
# ..$ conf.int : atomic [1:2] -73.4 73.5 
# .. ..- attr(*, "conf.level")= num 0.95 
# ..$ estimate : Named num [1:2] 45.7 45.6 
# .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y" 
# ..$ null.value : Named num 0 
# .. ..- attr(*, "names")= chr "difference in means" 
# ..$ alternative: chr "two.sided" 
# ..$ method  : chr "Welch Two Sample t-test" 
# ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]" 
# ..- attr(*, "class")= chr "htest" 
# $ :List of 9 
# ..$ statistic : Named num -0.00311 
# .. ..- attr(*, "names")= chr "t" 
# ..$ parameter : Named num 12 
# .. ..- attr(*, "names")= chr "df" 
# ..$ p.value : num 0.998 
# ..$ conf.int : atomic [1:2] -73.4 73.2 
# .. ..- attr(*, "conf.level")= num 0.95 
# ..$ estimate : Named num [1:2] 45.8 45.9 
# .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y" 
# ..$ null.value : Named num 0 
# .. ..- attr(*, "names")= chr "difference in means" 
# ..$ alternative: chr "two.sided" 
# ..$ method  : chr "Welch Two Sample t-test" 
# ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]" 
# ..- attr(*, "class")= chr "htest" 
ret[[1]]$p.value 
# [1] 0.998 

而且你仍然可以輕鬆地從結果中獲得的p值向量

相關問題