I have the following data frame in R
n1 n2 n3 n4 n5 n6 n7 n8 n9 n10 n11 n12 n13 n14
3557 10 5 13 3 2 8 1 9 4 7 6 12 11 14
3558 9 10 11 1 12 7 2 14 5 8 13 4 3 6
3559 6 7 2 14 11 10 12 5 13 8 1 9 3 4
3560 8 11 13 1 12 3 4 2 10 9 14 7 5 6
3561 3 9 7 8 12 5 10 2 1 6 14 11 4 13
3562 14 2 3 10 6 12 5 13 8 9 7 4 1 11
3563 13 6 5 9 7 8 10 11 2 3 4 1 12 14
3564 10 11 3 5 12 7 14 4 9 8 2 13 1 6
3565 4 8 11 7 10 3 13 14 12 2 5 9 1 6
3566 12 9 2 11 1 14 6 10 8 3 7 5 4 13
3567 11 12 6 7 8 14 10 13 3 4 9 2 1 5
3568 14 7 5 11 8 12 4 3 1 9 6 13 10 2
3569 1 11 13 2 14 6 9 7 4 12 3 5 8 10
3570 4 5 10 8 13 7 11 3 2 14 9 12 6 1
3571 6 9 13 14 12 2 3 7 10 8 1 5 4 11
3572 2 6 11 5 9 13 8 10 3 4 14 7 1 12
3573 10 13 12 9 5 14 11 6 1 3 4 8 7 2
3574 2 10 4 7 13 5 9 8 12 11 14 3 1 6
and I want to extract the column name with which it has the largest number in that row. i.e.
Choice
3557 n14
3558 n8
3559 n4
3560 n11
3561 n11
3562 n1
3563 n14
3564 n7
3565 n8
3566 n6
3567 n6
3568 n1
3569 n5
3570 n10
3571 n4
3572 n11
3573 n6
3574 n11
Is there any quick and efficient way to do it? Thanks in advance.
CodePudding user response:
An efficient option would be max.col
to find the column index for each row where the value is max
and then use the index to return the column names
out <- data.frame(Choice = names(df1)[max.col(df1, "first")])
row.names(out) <- row.names(df1)
-output
> out
Choice
3557 n14
3558 n8
3559 n4
3560 n11
3561 n11
3562 n1
3563 n14
3564 n7
3565 n8
3566 n6
3567 n6
3568 n1
3569 n5
3570 n10
3571 n4
3572 n11
3573 n6
3574 n11
data
df1 <- structure(list(n1 = c(10L, 9L, 6L, 8L, 3L, 14L, 13L, 10L, 4L,
12L, 11L, 14L, 1L, 4L, 6L, 2L, 10L, 2L), n2 = c(5L, 10L, 7L,
11L, 9L, 2L, 6L, 11L, 8L, 9L, 12L, 7L, 11L, 5L, 9L, 6L, 13L,
10L), n3 = c(13L, 11L, 2L, 13L, 7L, 3L, 5L, 3L, 11L, 2L, 6L,
5L, 13L, 10L, 13L, 11L, 12L, 4L), n4 = c(3L, 1L, 14L, 1L, 8L,
10L, 9L, 5L, 7L, 11L, 7L, 11L, 2L, 8L, 14L, 5L, 9L, 7L), n5 = c(2L,
12L, 11L, 12L, 12L, 6L, 7L, 12L, 10L, 1L, 8L, 8L, 14L, 13L, 12L,
9L, 5L, 13L), n6 = c(8L, 7L, 10L, 3L, 5L, 12L, 8L, 7L, 3L, 14L,
14L, 12L, 6L, 7L, 2L, 13L, 14L, 5L), n7 = c(1L, 2L, 12L, 4L,
10L, 5L, 10L, 14L, 13L, 6L, 10L, 4L, 9L, 11L, 3L, 8L, 11L, 9L
), n8 = c(9L, 14L, 5L, 2L, 2L, 13L, 11L, 4L, 14L, 10L, 13L, 3L,
7L, 3L, 7L, 10L, 6L, 8L), n9 = c(4L, 5L, 13L, 10L, 1L, 8L, 2L,
9L, 12L, 8L, 3L, 1L, 4L, 2L, 10L, 3L, 1L, 12L), n10 = c(7L, 8L,
8L, 9L, 6L, 9L, 3L, 8L, 2L, 3L, 4L, 9L, 12L, 14L, 8L, 4L, 3L,
11L), n11 = c(6L, 13L, 1L, 14L, 14L, 7L, 4L, 2L, 5L, 7L, 9L,
6L, 3L, 9L, 1L, 14L, 4L, 14L), n12 = c(12L, 4L, 9L, 7L, 11L,
4L, 1L, 13L, 9L, 5L, 2L, 13L, 5L, 12L, 5L, 7L, 8L, 3L), n13 = c(11L,
3L, 3L, 5L, 4L, 1L, 12L, 1L, 1L, 4L, 1L, 10L, 8L, 6L, 4L, 1L,
7L, 1L), n14 = c(14L, 6L, 4L, 6L, 13L, 11L, 14L, 6L, 6L, 13L,
5L, 2L, 10L, 1L, 11L, 12L, 2L, 6L)), class = "data.frame", row.names = c("3557",
"3558", "3559", "3560", "3561", "3562", "3563", "3564", "3565",
"3566", "3567", "3568", "3569", "3570", "3571", "3572", "3573",
"3574"))
CodePudding user response:
dplyr
option where we first reshape the data from wide to long to find which column with name has the highest value per id:
df <- read.table(text = " n1 n2 n3 n4 n5 n6 n7 n8 n9 n10 n11 n12 n13 n14
3557 10 5 13 3 2 8 1 9 4 7 6 12 11 14
3558 9 10 11 1 12 7 2 14 5 8 13 4 3 6
3559 6 7 2 14 11 10 12 5 13 8 1 9 3 4
3560 8 11 13 1 12 3 4 2 10 9 14 7 5 6
3561 3 9 7 8 12 5 10 2 1 6 14 11 4 13
3562 14 2 3 10 6 12 5 13 8 9 7 4 1 11
3563 13 6 5 9 7 8 10 11 2 3 4 1 12 14
3564 10 11 3 5 12 7 14 4 9 8 2 13 1 6
3565 4 8 11 7 10 3 13 14 12 2 5 9 1 6
3566 12 9 2 11 1 14 6 10 8 3 7 5 4 13
3567 11 12 6 7 8 14 10 13 3 4 9 2 1 5
3568 14 7 5 11 8 12 4 3 1 9 6 13 10 2
3569 1 11 13 2 14 6 9 7 4 12 3 5 8 10
3570 4 5 10 8 13 7 11 3 2 14 9 12 6 1
3571 6 9 13 14 12 2 3 7 10 8 1 5 4 11
3572 2 6 11 5 9 13 8 10 3 4 14 7 1 12
3573 10 13 12 9 5 14 11 6 1 3 4 8 7 2
3574 2 10 4 7 13 5 9 8 12 11 14 3 1 6", header = TRUE)
library(dplyr)
library(tidyr)
library(tibble)
df %>%
rownames_to_column('id') %>%
gather(Choice, highest, n1:n14) %>%
group_by(id) %>%
slice(which.max(highest)) %>%
column_to_rownames("id") %>%
select(-highest)
#> Choice
#> 3557 n14
#> 3558 n8
#> 3559 n4
#> 3560 n11
#> 3561 n11
#> 3562 n1
#> 3563 n14
#> 3564 n7
#> 3565 n8
#> 3566 n6
#> 3567 n6
#> 3568 n1
#> 3569 n5
#> 3570 n10
#> 3571 n4
#> 3572 n11
#> 3573 n6
#> 3574 n11
Created on 2022-07-25 by the reprex package (v2.0.1)