How can I return the column name and value of row-wise maximum and second-maximum of a data.frame?
Example data:
example_df <- data.frame(
mycol1 =c(101,-108,140,140,-150),
mycol2 = c(111,-105,128,-220,-150),
mycol3 = c(-103,-102,123,-210,-180),
mycol4 =c(106,-102,140,-200,-180),
mycol5 =c(-105,-106,120,-150,-180)
)
Desired data:
desired_df <- data.frame(
firstmax=c("mycol2 111","mycol3 -102","mycol1 140","mycol1 140", "mycol1 -150"),
secondmax=c("mycol4 106", "mycol4 -102", "mycol4 140","mycol5 -150","mycol2 -150"),
stringsAsFactors = F
)
CodePudding user response:
This is probably not the most efficient way to go about it but it gets the job done,
vals <- t(apply(example_df, 1, \(i)sort(i, decreasing = TRUE))[1:2,])
nms <- t(apply(example_df, 1, \(i)names(i)[order(i, decreasing = TRUE)])[1:2,])
mapply(\(x, y) paste(x, y, sep = ' '), data.frame(nms), data.frame(vals))
X1 X2
[1,] "mycol2 111" "mycol4 106"
[2,] "mycol3 -102" "mycol4 -102"
[3,] "mycol1 140" "mycol4 140"
[4,] "mycol1 140" "mycol5 -150"
[5,] "mycol1 -150" "mycol2 -150"
You can tidy up the output the way you want. You can also play around with max.col()
to somewhat vectorize it
CodePudding user response:
Here is one way with max.col
- get the max column index per row, use that index to replace
the values to -Inf
, extract the second index and create a data.frame after paste
ing with the column names extracted
first <- max.col(example_df, 'first')
rn <- seq_len(nrow(example_df))
second <- max.col(replace(example_df, cbind(rn, first), -Inf), "first")
data.frame(firstmax = paste(names(example_df)[first],
example_df[cbind(rn, first)]),
secondmax = paste(names(example_df)[second], example_df[cbind(rn, second)]))
-output
firstmax secondmax
1 mycol2 111 mycol4 106
2 mycol3 -102 mycol4 -102
3 mycol1 140 mycol4 140
4 mycol1 140 mycol5 -150
5 mycol1 -150 mycol2 -150
CodePudding user response:
Here is a data.table approach:
- Melt Long
library(data.table)
df_long = melt(setDT(example_df)[, i:=.I],"i")[
order(-value),.SD[1:2,.(paste(variable,value),c("firstmax", "secondmax"))],i]
- Dcast wide
dcast(df_long, i~V2, value.var="V1")[,c(2,3)]
Output:
firstmax secondmax
1: mycol2 111 mycol4 106
2: mycol3 -102 mycol4 -102
3: mycol1 140 mycol4 140
4: mycol1 140 mycol5 -150
5: mycol1 -150 mycol2 -150
CodePudding user response:
Here is my take on the query using loops
example_list<-list() #stores col name and value of highest and second highest
for (i in 1:nrow(example_df)){
example_list[[i]]<-sort(example_df[i,],decreasing = TRUE)[1:2]}
example_list2<-list() #list with 1st sublevel containing highest colname and value and 2nd sublevel containing 2nd highest colname and value for each level of the list
for (i in 1:length(example_list)){
jnk<-list()
for (j in 1:length(example_list[[i]])){
jnk[[j]]<-c(colnames(example_list[[i]][j]),as.numeric(example_list[[i]][j]))
example_list2[[i]]<-jnk}}
first_max<-list() #list with colname and value of highest
for (i in 1:length(example_list2)){
first_max[[i]]<-example_list2[[i]][[1]]}
second_max<-list() #list with colname and value of 2nd highest
for (i in 1:length(example_list2)){
second_max[[i]]<-example_list2[[i]][[2]]}
first_max2<-as.character() #colname and value of highest pasted together
for (i in 1:length(first_max)){
first_max2[i]<-paste0(first_max[[i]],sep=" ",collapse = "")}
second_max2<-as.character() #colname and value of 2nd highest pasted together
for (i in 1:length(second_max)){
second_max2[i]<-paste0(second_max[[i]],sep=" ",collapse = "")}
final_df<-data.frame(firstmax=first_max2,secondmax=second_max2) #desired df