2017-04-22 69 views
1

我正在嘗試使用大約100列和100000行的數據幀,並將類似命名的列組合到一列中,例如C1,C2,C3轉換爲C.我一直在嘗試使用dplyr gather函數,但我似乎無法獲得所需的輸出,如果更容易,我很樂意使用其他軟件包。下面包含了一個簡化的例子。我確信我缺少一些簡單的東西,任何幫助將非常感謝。使用dplyr將多個列合併到一起

id = c(222, 222, 222, 333, 333, 333, 444, 444, 444) 
timepoint = c("aa", "aa", "bb", "aa", "aa", "bb", "aa", "aa", "bb") 
position = c(1, 2, 1, 1, 2,1 , 1, 2, 1) 
C1 = c("aat", "aaf", "bbg", "aag", "aag", "bbg", "aag", "aag", "bbg") 
P1 = c("A", "B", "C", "J", "J", "J", "J", "H", "H") 
X1 = c(21, 22, 23, 33, 35, 33, 41, 43, 45) 
C2 = c("aat", "aaf", "bbg", "aag", "aag", "bbg", "aag", "aag", "bbg") 
P2 = c("A", "B", "C", "J", "J", "J", "J", "H", "H") 
X2 = c(21, 22, 23, 33, 35, 33, 41, 43, 45) 
C3 = c("aat", "aaf", "bbg", "aag", "aag", "bbg", "aag", "aag", "bbg") 
P3 = c("A", "B", "C", "J", "J", "J", "J", "H", "H") 
X3 = c(21, 22, 23, 33, 35, 33, 41, 43, 45) 

df = data.frame(id, timepoint, position, C1, P1, X1, C2, P2, X2, C3, P3, X3) 

我想從這個格式

id timepoint position C1 P1 X1 C2 P2 X2 C3 P3 X3 
222 aa   1   aat A 21 aat A 21 aat A 21 
222 aa   2   aaf B 22 aaf B 22 aaf B 22 
222 bb   1   bbg C 23 bbg C 23 bbg C 23 
333 aa   1   aag J 33 aag J 33 aag J 33 
333 aa   2   aag J 35 aag J 35 aag J 35 
333 bb   1   bbg J 33 bbg J 33 bbg J 33 
444 aa   1   aag J 41 aag J 41 aag J 41 
444 aa   2   aag H 43 aag H 43 aag H 43 
444 bb   1   bbg H 45 bbg H 45 bbg H 45 

去這個格式。

id timepoint position C P X 
222 aa   1   aat A 21 
222 aa   2   aaf B 22 
222 bb   1   bbg C 23 
333 aa   1   aag J 33 
333 aa   2   aag J 35 
333 bb   1   bbg J 33 
444 aa   1   aag J 41 
444 aa   2   aag H 43 
444 bb   1   bbg H 45 
222 aa   1   aat A 21 
222 aa   2   aaf B 22 
222 bb   1   bbg C 23 
333 aa   1   aag J 33 
333 aa   2   aag J 35 
333 bb   1   bbg J 33 
444 aa   1   aag J 41 
444 aa   2   aag H 43 
444 bb   1   bbg H 45 
222 aa   1   aat A 21 
222 aa   2   aaf B 22 
222 bb   1   bbg C 23 
333 aa   1   aag J 33 
333 aa   2   aag J 35 
333 bb   1   bbg J 33 
444 aa   1   aag J 41 
444 aa   2   aag H 43 
444 bb   1   bbg H 45 

回答

2

我們可以melt做到這一點很容易從data.table可以採取多種measurepatterns

library(data.table) 
melt(setDT(df), measure = patterns("^C\\d+", "^P\\d+", "^X\\d+"), 
      value.name = c("C", "P", "X"))[, variable := NULL][] 
#  id timepoint position C P X 
# 1: 222  aa  1 aat A 21 
# 2: 222  aa  2 aaf B 22 
# 3: 222  bb  1 bbg C 23 
# 4: 333  aa  1 aag J 33 
# 5: 333  aa  2 aag J 35 
# 6: 333  bb  1 bbg J 33 
# 7: 444  aa  1 aag J 41 
# 8: 444  aa  2 aag H 43 
# 9: 444  bb  1 bbg H 45 
#10: 222  aa  1 aat A 21 
#11: 222  aa  2 aaf B 22 
#12: 222  bb  1 bbg C 23 
#13: 333  aa  1 aag J 33 
#14: 333  aa  2 aag J 35 
#15: 333  bb  1 bbg J 33 
#16: 444  aa  1 aag J 41 
#17: 444  aa  2 aag H 43 
#18: 444  bb  1 bbg H 45 
#19: 222  aa  1 aat A 21 
#20: 222  aa  2 aaf B 22 
#21: 222  bb  1 bbg C 23 
#22: 333  aa  1 aag J 33 
#23: 333  aa  2 aag J 35 
#24: 333  bb  1 bbg J 33 
#25: 444  aa  1 aag J 41 
#26: 444  aa  2 aag H 43 
#27: 444  bb  1 bbg H 45 
+1

感謝這麼多,我知道這將是一些簡單的 –