我需要將大型數據集重組爲一個特定的格式以供進一步分析。現在數據是長格式的,每個點都有多個記錄。我需要重新整理數據,以便每個點都有一條記錄,但它會添加許多新的時間特定數據列。我看過以前的類似帖子,但我最終需要將幾個當前變量轉換爲列,而我找不到這樣的例子。有沒有一種方法可以在一次重塑中實現這個目標,或者我需要做幾個步驟,然後將新的列連接起來?在我發佈這個例子之前,另一個缺點是並不是所有的點都在每個時間步採樣,所以我需要這些值顯示爲NA。例如(見下面的數據)SitePoint A1在2012年根本沒有采樣,2012年第一輪沒有采樣SitePoint A10,但是K83全部採樣了9次。重塑缺失值和多個感興趣值的大矩陣
mydatain <- structure(list(SitePoint = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L), .Label = c("A1", "A10", "K145", "K83", "T15",
"T213"), class = "factor"), Year_Rotation = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 7L), .Label = c("2010_1", "2010_2",
"2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3"
), class = "factor"), MR_Fire = structure(c(5L, 6L, 6L, 2L, 9L,
9L, 5L, 6L, 6L, 2L, 9L, 9L, 7L, 8L, 16L, 17L, 21L, 22L, 23L,
25L, 3L, 4L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 1L,
2L, 2L, 5L, 6L, 6L, 11L, 11L, 12L, 7L, 24L), .Label = c("0",
"1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25",
"35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53",
"8", "9"), class = "factor"), fire_seas = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L), .Label = c("dry", "fire", "wet"
), class = "factor"), OptTSF = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 1L)), .Names = c("SitePoint", "Year_Rotation", "MR_Fire",
"fire_seas", "OptTSF"), row.names = c(31L, 32L, 33L, 34L, 35L,
36L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 10543L, 10544L,
10545L, 10546L, 10547L, 10548L, 10549L, 10550L, 14988L, 14989L,
14990L, 14991L, 14992L, 14993L, 14994L, 14995L, 14996L, 17370L,
17371L, 17372L, 17373L, 17374L, 17375L, 17376L, 17377L, 17378L,
19353L, 19354L), class = "data.frame")
最後,我需要的是這樣的:
myfinal <- structure(list(SitePoint = structure(1:6, .Label = c("A1", "A10",
"K145", "K83", "T15", "T213"), class = "factor"), MR_Fire_2010_1 = c(12L,
12L, 39L, 23L, 0L, 14L), MR_Fire_2010_2 = c(13L, 13L, 40L, 24L,
1L, NA), MR_Fire_2010_3 = c(13L, 13L, NA, 25L, 1L, NA), MR_Fire_2011_1 = c(1L,
1L, 51L, 35L, 12L, NA), MR_Fire_2011_2 = c(2L, 2L, 52L, 36L,
13L, NA), MR_Fire_2011_3 = c(2L, 2L, 53L, 37L, 13L, NA), MR_Fire_2012_1 = c(NA,
NA, 9L, 47L, 24L, 8L), MR_Fire_2012_2 = c(NA, 14L, 10L, 48L,
24L, NA), MR_Fire_2012_3 = c(NA, 15L, 11L, 49L, 25L, NA), season_2010_1 = structure(c(2L,
2L, 1L, 2L, 2L, 1L), .Label = c("dry", "fire"), class = "factor"),
season_2010_2 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2010_3 = structure(c(1L,
1L, NA, 1L, 1L, NA), .Label = "fire", class = "factor"),
season_2011_1 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2011_2 = structure(c(2L,
2L, 1L, 2L, 2L, NA), .Label = c("dry", "fire"), class = "factor"),
season_2011_3 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2012_1 = structure(c(NA,
NA, 2L, 1L, 1L, 2L), .Label = c("fire", "wet"), class = "factor"),
season_2012_2 = structure(c(NA, 1L, 2L, 1L, 1L, NA), .Label = c("fire",
"wet"), class = "factor"), season_2012_3 = structure(c(NA,
1L, 2L, 1L, 1L, NA), .Label = c("fire", "wet"), class = "factor"),
OptTSF_2010_1 = c(1L, 1L, 0L, 1L, 1L, 1L), OptTSF_2010_2 = c(1L,
1L, 0L, 1L, 1L, NA), OptTSF_2010_3 = c(1L, 1L, NA, 1L, 1L,
NA), OptTSF_2011_1 = c(1L, 1L, 0L, 0L, 1L, NA), OptTSF_2011_2 = c(1L,
1L, 0L, 0L, 1L, NA), OptTSF_2011_3 = c(1L, 1L, 0L, 0L, 1L,
NA), OptTSF_2012_1 = c(NA, NA, 1L, 0L, 0L, 1L), OptTSF_2012_2 = c(NA,
1L, 1L, 0L, 0L, NA), OptTSF_2012_3 = c(NA, 1L, 1L, 0L, 0L,
NA)), .Names = c("SitePoint", "MR_Fire_2010_1", "MR_Fire_2010_2",
"MR_Fire_2010_3", "MR_Fire_2011_1", "MR_Fire_2011_2", "MR_Fire_2011_3",
"MR_Fire_2012_1", "MR_Fire_2012_2", "MR_Fire_2012_3", "season_2010_1",
"season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2",
"season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3",
"OptTSF_2010_1", "OptTSF_2010_2", "OptTSF_2010_3", "OptTSF_2011_1",
"OptTSF_2011_2", "OptTSF_2011_3", "OptTSF_2012_1", "OptTSF_2012_2",
"OptTSF_2012_3"), class = "data.frame", row.names = c(NA, -6L
))
實際的數據集大約是23656條記錄X 15個變量,這樣做手工很容易造成重大頭痛和潛在的錯誤。任何幫助或建議表示讚賞。如果這已在其他地方得到解答,請致歉。我找不到任何直接適用的東西;一切似乎都與三列有關,只有一列被提取爲新變量。謝謝。
SP
這看起來就像我希望的那樣。我現在正在下載/更新以嘗試它。非常感謝。 – 2015-03-31 20:27:34
@ A.Birdman感謝您的意見。我希望這個對你有用。 – akrun 2015-03-31 20:32:46
我終於得到了一切更新(無論如何都需要發生),並安裝了data.table的v1.9.5,它的工作原理與我上傳的有限數據集相似。再次感謝你讓我的生活變得更加愉快! – 2015-04-01 15:30:04