2015-11-02 52 views
2

我試圖計算不同患者按醫院分層,疾病等級和年份的中位生存期。在plyr中,這很簡單,但我試圖加快使用dplyr(因爲它的速度)。不過,我對輸出結果感到困惑,因爲生存結果放置在數據框內的列表中。如何將生成的m6數據幀轉換爲「有用」的數據幀以供後續分析? dplyr中有do函數的教程嗎?do,survivalanalysis和dplyr

編輯1: m6$ty2<-unlist(m6$te)似乎這樣的伎倆

但是,如果我不是做

m6<-ml %>% group_by(year,Grade,hospital) %>% do(te=survfit(Surv(time=surv, event=status=="Dead")~-1,data=.)) 

如何去訪問survfit對象的單個元素?

這是我寫的另一個代碼。

m6<- ml %>% group_by(year,Grade,hospital) %>% 
    do(te=summary(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))$table['median']) 

DATA

> dput(ml) 
structure(list(year = structure(c(12L, 12L, 2L, 7L, 5L, 1L, 1L, 
9L, 3L, 5L, 3L, 7L, 11L, 3L, 13L, 5L, 6L, 6L, 9L, 13L, 7L, 5L, 
11L, 8L, 1L, 2L, 11L, 4L, 6L, 13L, 7L, 4L, 8L, 13L, 7L, 8L, 1L, 
9L, 7L, 5L, 6L, 5L, 10L, 13L, 7L, 10L, 10L, 8L, 13L, 11L, 11L, 
12L, 10L, 6L, 7L, 11L, 2L, 11L, 6L, 11L, 2L, 1L, 12L, 11L, 11L, 
12L, 8L, 4L, 4L, 11L, 13L, 8L, 6L, 5L, 10L, 3L, 12L, 3L, 11L, 
1L, 5L, 11L, 4L, 5L, 5L, 7L, 9L, 7L, 11L, 13L, 7L, 12L, 6L, 6L, 
11L, 2L, 7L, 10L, 7L, 7L, 8L, 8L, 4L, 12L, 8L, 3L, 12L, 3L, 11L, 
11L, 13L, 3L, 5L, 6L, 4L, 10L, 12L, 9L, 10L, 5L, 12L, 10L, 7L, 
2L, 7L, 6L, 12L, 10L, 3L, 2L, 7L, 5L, 2L, 12L, 10L, 3L, 6L, 9L, 
2L, 7L, 4L, 6L, 3L, 8L, 5L, 10L, 8L, 10L, 11L, 10L, 13L, 10L, 
5L, 3L, 11L, 5L, 1L, 7L, 13L, 13L, 9L, 13L, 12L, 10L, 4L, 13L, 
7L, 4L, 12L, 12L, 11L, 5L, 11L, 3L, 6L, 10L, 3L, 3L, 13L, 1L, 
13L, 6L, 6L, 12L, 1L, 6L, 5L, 10L, 2L, 5L, 11L, 9L, 9L, 9L, 2L, 
7L, 5L, 12L, 8L, 2L), .Label = c("2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), class = "factor"), Grade = structure(c(3L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 
3L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 
3L, 1L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
2L, 3L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 
2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L, 2L, 2L, 
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 
2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 
3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 
1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 1L, 3L, 
3L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
1L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 
3L, 2L, 3L, 1L), .Label = c("II", "III", "IV"), class = "factor"), 
    hospital = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 
    2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 
    2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("A", 
    "B"), class = "factor"), surv = c(0.298425735797399, 0.235455167693361, 
    0.243668720054757, 1.98220396988364, 0.955509924709103, 0.386036960985626, 
    0.503764544832307, 1.19644079397673, 0.139630390143737, 7.82477754962355, 
    1.3990417522245, 2.48596851471595, 2.19849418206708, 1.0321697467488, 
    0.369609856262834, 5.88364134154689, 4.54757015742642, 0.334017796030116, 
    0.498288843258042, 0.621492128678987, 0.563997262149213, 
    0.128678986995209, 2.40109514031485, 1.61533196440794, 0.142368240930869, 
    5.08966461327858, 1.89733059548255, 7.31553730321698, 1.27310061601643, 
    0.561259411362081, 6.07529089664613, 0.369609856262834, 0.287474332648871, 
    0.251882272416153, 6.21492128678987, 0.670773442847365, 0.54757015742642, 
    4.10130047912389, 0.865160848733744, 0.468172484599589, 0.271047227926078, 
    0.843258042436687, 1.3990417522245, 0.0739219712525667, 0.470910335386721, 
    0.183436002737851, 0.832306639288159, 0.427104722792608, 
    0.457221081451061, 2.02874743326489, 0.616016427104723, 1.00479123887748, 
    2.86105407255305, 0.449007529089665, 0.0711841204654346, 
    2.19849418206708, 2.00958247775496, 1.82067077344285, 0.334017796030116, 
    1.95756331279945, 5.9958932238193, 0.903490759753593, 0.824093086926763, 
    0.684462696783025, 0.629705681040383, 1.62628336755647, 1.94934976043806, 
    0.490075290896646, 9.07323750855578, 0.991101984941821, 0.0520191649555099, 
    0.900752908966461, 0.476386036960986, 0.298425735797399, 
    0.213552361396304, 1.25941136208077, 0.158795345653662, 9.89459274469541, 
    0.295687885010267, 11.8576317590691, 0.889801505817933, 2.05338809034908, 
    2.39014373716632, 0.509240246406571, 8.16427104722793, 3.68240930869268, 
    0.720054757015743, 2.79808350444901, 0.974674880219028, 0.678986995208761, 
    0.525667351129363, 1.4757015742642, 7.2662559890486, 0.125941136208077, 
    0.884325804243669, 0.281998631074606, 0.435318275154004, 
    1.43463381245722, 2.19028062970568, 0.0657084188911704, 5.50581793292266, 
    0.70362765229295, 8.95824777549624, 0.37782340862423, 5.47022587268994, 
    0.555783709787817, 0.731006160164271, 0.287474332648871, 
    1.23477070499658, 2.40383299110198, 0.462696783025325, 0.65160848733744, 
    0.194387405886379, 0.48186173853525, 7.68514715947981, 3.07460643394935, 
    0.0164271047227926, 3.93429158110883, 0.928131416837782, 
    1.91649555099247, 0.331279945242984, 0.528405201916496, 0.87337440109514, 
    2.13004791, 1.57152635181383, 2.87200547570157, 0.188911704312115, 
    0.810403832991102, 1.16906228610541, 11.2553045859001, 0.147843942505133, 
    6.18480492813142, 10.9103353867214, 0.714579055441478, 0.380561259411362, 
    1.88090349075975, 0.958247775496235, 0.0328542094455852, 
    0.191649555099247, 1.05133470225873, 1.01848049281314, 0.720054757015743, 
    1.91649555099247, 0.670773442847365, 1.40177960301164, 3.50992470910335, 
    5.42642026009583, 3.62765229295003, 0.884325804243669, 1.06776180698152, 
    0.123203285420945, 0.826830937713895, 5.637234770705, 1.72210814510609, 
    2.43668720054757, 0.292950034223135, 0.375085557837098, 5.90828199863107, 
    0.54757015742642, -0.0164271047227926, 4.7337440109514, 0.136892539356605, 
    1.08145106091718, 0.353182751540041, 1.23203285420945, 0.229979466119097, 
    2.42847364818617, 0.0383299110198494, 1.57426420260096, 0.49555099247091, 
    1.90006844626968, 3.0444900752909, 0.342231348391513, 0.364134154688569, 
    0.0109514031485284, 3.53456536618754, 1.3990417522245, 1.41820670773443, 
    0.0355920602327173, 0.607802874743327, 0.0191649555099247, 
    6.80903490759754, 0.243668720054757, 1.15263518138261, 0.900752908966461, 
    3.130047912, 0.659822039698836, 0.514715947980835, 0.706365503080082, 
    0.194387405886379, 2.58726899383984, 0.484599589322382, 1.01026694045175, 
    0.145106091718001, 1.85352498288843, 0.87337440109514, 0.383299110198494, 
    1.3305954825462, 0.0465434633812457, 6.65297741273101), status = c("Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Alive", "Dead", "Dead", "Alive", "Alive", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Alive", "Alive", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", "Alive", 
    "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", 
    "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", 
    "Dead", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Alive", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Alive", "Alive", "Dead", "Dead", "Alive", "Dead", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Alive", "Alive", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Alive", "Dead", "Alive", "Dead", "Dead", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", "Dead", 
    "Alive", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead")), .Names = c("year", 
"Grade", "hospital", "surv", "status"), class = c("tbl_dt", "tbl", 
"data.table", "data.frame"), row.names = c(NA, -200L)) 
+0

你想在最後看到什麼樣的數據? – jazzurro

+0

我希望看到一列分別爲低於95,中位數和高位95。 – Misha

+0

我可以看到結果中較低和較高。中位數會是多少?它會成爲「存活」價值嗎? – jazzurro

回答

4

THX您的輸入jazzurro,但我發現掃帚 - 和halleluja ...榮譽給哈德利和dgrtwo他們所有的努力。

require (broom) 
m6<-ml %>% group_by(year,Grade,hospital) %>% 
do(glance(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.)) 
+0

我很高興看到您找到解決方案!我從你那裏學到了東西。 +1 – jazzurro

1

這是我能爲現在要做的。但是,我認爲這接近你想要的。我認爲面臨的挑戰是從列表中提取必要的組件並創建數據框。我一直在玩purrr包,並認爲它會幫助我們在這裏。我在我的代碼中調用了mlmydf

library(dplyr) 
library(purrr) 

# Run the model 
group_by(mydf, year, Grade, hospital) %>% 
do(te = survfit(Surv(time = surv, event = status == "Dead") ~ -1, data = .)) -> foo 

檢查foo,你會看到一個新列te,其中包含列表。

#  year Grade hospital   te 
# (fctr) (fctr) (fctr)  (chr) 
#1 2014  IV  A <S3:survfit> 
#2 2014 III  A <S3:survfit> 
#3 2004  IV  A <S3:survfit> 
#4 2009  IV  A <S3:survfit> 
#5 2007  IV  A <S3:survfit> 

我在purrr封裝中使用zip_n()安排表。該函數的功能是爲每個列表組件創建一個包含元素的列表。例如,您的surv的值爲te。這些值是所有列表中的商店。該函數收集所有值並將它們放入列表中。我認爲運行以下代碼並查看數據結構是瞭解發生的最佳方式。我還更改了一個名爲lower的列表的名稱,以供稍後操作。

foo$te %>% zip_n(.simplify = TRUE) -> whatever 

names(whatever$lower) <- paste("model", seq(1:length(whatever$lower)), ".", sep = "") 

新的列表,whatever看起來是這樣的(顯示它的一部分)

# str(whatever) 
#List of 13 
#$ n  : int [1:68] 7 4 3 12 6 7 2 8 5 5 ... 
#$ time  : num [1:194] 0.189 0.298 0.331 0.715 0.731 ... 
#$ n.risk : num [1:194] 7 6 5 4 3 2 1 4 3 2 ... 
#$ n.event : num [1:194] 1 1 1 1 1 1 1 1 1 0 ... 
#$ n.censor : num [1:194] 0 0 0 0 0 0 0 0 0 1 ... 
#$ surv  : num [1:194] 0.857 0.714 0.571 0.429 0.286 ... 
#$ type  : chr [1:68] "right" "right" "right" "right" ... 
#$ std.err : num [1:194] 0.154 0.239 0.327 0.436 0.598 ... 
#$ upper : num [1:194] 1 1 1 1 0.922 ... 
#$ lower :List of 68 
#..$ model1. : num [1:7] 0.6334 0.4471 0.3008 0.1822 0.0886 ... 
#..$ model2. : num [1:4] 0.426 0.188 0.188 0.188 

我不能確定哪些值是中位數。至少,我認爲upperlower是你想要的兩件事。所以讓我安排一個數據框。

列表lowerwhatever包含數字和邏輯。當你使用as_vector()時,你似乎只有一種類型。鑑於此,我將全部轉換爲數字,然後應用該函數以創建矢量。

lapply(whatever$lower, as.numeric) %>% 
as_vector(.type = "numeric") -> lower 

這就是lower的一部分外觀。

model1.1 model1.2 model1.3 model1.4 model1.5 model1.6 model1.7 model2.1 model2.2 model2.3 
0.63344653 0.44709095 0.30084365 0.18219162 0.08856084 0.02327247   NA 0.42593227 0.18765893 0.18765893 

最後,您將創建一個新的數據框。

newdf <- data.frame(upper = unlist(whatever$upper), 
        lower = lower) 

#    upper  lower 
#model1.1 1.0000000 0.63344653 
#model1.2 1.0000000 0.44709095 
#model1.3 1.0000000 0.30084365 
#model1.4 1.0000000 0.18219162 
#model1.5 0.9217692 0.08856084 
#model1.6 0.8769230 0.02327247