2017-08-08 100 views
1

我想按列排列值。按列排列的值排列值

我有以下數據幀:

dput(test) 
structure(list(Name = c("A", "B", "C", "D"), Margin = c(744, 
3196.4722, 0, 394), T1 = c(420, 200, 2150, 70), T2 = c(630, 285, 
2365, 84), T3 = c(630, 335, 2580, 105), T4 = c(666, 410, 2795, 
128), T5 = c(2244, 2961.7931, 3010, 142), T6 = c(2244, 3652.472, 
3440, 151), T7 = c(2244, 3722.472, 3870, 168), T8 = c(2244, 3887.472, 
5160, 187), T9 = c(2244, 4112.472, 6450, 225), T10 = c(2244, 
4337.472, 6450, 225), T11 = c(798, 3567.472, 4300, 112), T12 = c(630, 
3582.472, 4300, 111), T13 = c(702, 3582.472, 4300, 112), T14 = c(3600, 
4637.472, 3440, 78), T15 = c(744, 3067.306, 2580, 274), T16 = c(744, 
2770.5666, 2580, 197), T17 = c(744, 3138.806, 2580, 80), T18 = c(2244, 
3920.0836, 3870, 401), T19 = c(2244, 2789.1117, 1290, 127)), .Names = c("Name", 
"Margin", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", 
"T10", "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", 
"T19"), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame" 
)) 

每行具有在名稱唯一的ID,並且我想排名的列,以確定哪些列是等於或至少小到在空白處列的值。

理想的輸出將是:

Name Margin Closest_Column 
A  744.000  T15 

打破平局可能是隨機的。

我嘗試:

nm1 <- paste("rank", names(test)[3:21], sep="_") 
test[nm1] <- mutate_all(test[3:21],funs(rank(., ties.method="first"))) 

回答

1

如果我們需要使用tidyverse,一種方法是rowwise,然後找到「保證金」等欄目之間的最小差值的指數來獲得列名

test %>% 
     rowwise() %>% 
     do(data.frame(.[1:2], Closest_column = names(.)[3:21][which.min(abs(.[[2]]- 
         unlist(.[3:21])))])) 
# A tibble: 4 x 3 
# Name Margin Closest_column 
#* <chr> <dbl>   <chr> 
#1  A 744.000   T15 
#2  B 3196.472   T17 
#3  C 0.000   T19 
#4  D 394.000   T18 

或者另一種選擇是

library(tidyverse) 
gather(test, Closest_column, val, T1:T19) %>% 
     group_by(Name) %>% 
     slice(which.min(abs(Margin - val))) %>% 
     select(-val) 
# A tibble: 4 x 3 
# Groups: Name [4] 
# Name Margin Closest_column 
# <chr> <dbl>   <chr> 
#1  A 744.000   T15 
#2  B 3196.472   T17 
#3  C 0.000   T19 
#4  D 394.000   T18 

隨着base R一個有效的辦法是max.col

cbind(test[1:2], 
    Closest_column = names(test)[3:21][max.col(-abs(test[3:21]-test[[2]]), 'first')]) 
# Name Margin Closest_column 
#1 A 744.000   T15 
#2 B 3196.472   T17 
#3 C 0.000   T19 
#4 D 394.000   T18 
+0

只是一個快速跟進。如果我想將條件更改爲:Val <= Margin,slice(which.min(abs(val <= Margin)))不會給我正確的答案。任何線索哪裏我錯了? – Prometheus

+0

@Prometheus你可以嘗試'收集(測試,最近的列,val,T1:T19)%>%group_by(Name)%>%slice(其中(val <= Margin)[1]) – akrun

2

使用cbind.data.frame的前兩列排列,從選爲具有最小的列減去保證金的絕對值名稱構造柱:

cbind(test[1:2], Closest_Column = 
    apply(test[-1], 1, function(x) names(x[-1])[which.min(abs(x[-1]-x[1]))])) 
    Name Margin Closest_Column 
1 A 744.000   T15 
2 B 3196.472   T17 
3 C 0.000   T19 
4 D 394.000   T18 
3

我會去長格式

library(tidyr) 
library(dplyr) 

test %>% 
    gather(Variable, Value, -(Name:Margin)) %>% 
    group_by(Name, Margin) %>% 
    summarise(Closest = Variable[which.min(abs(Value - Margin))]) 

# A tibble: 4 x 3 
# Groups: Name [?] 
# Name Margin Closest 
# <chr> <dbl> <chr> 
# 1  A 744.000  T15 
# 2  B 3196.472  T17 
# 3  C 0.000  T19 
# 4  D 394.000  T18 

或者使用data.table

library(data.table) 
melt(setDT(test), 1:2 
    )[, .(Closest = variable[which.min(abs(value - Margin))]), 
     by = .(Name, Margin)] 
# Name Margin Closest 
# 1: A 744.000  T15 
# 2: B 3196.472  T17 
# 3: C 0.000  T19 
# 4: D 394.000  T18 
+0

這實際上非常直觀。我應該想到它。 – Prometheus