Home > Software design >  Get max and second max of row including column names in R
Get max and second max of row including column names in R

Time:09-27

How can I return the column name and value of row-wise maximum and second-maximum of a data.frame?

Example data:

example_df <- data.frame(
  mycol1 =c(101,-108,140,140,-150),
  mycol2 = c(111,-105,128,-220,-150),
  mycol3 = c(-103,-102,123,-210,-180), 
  mycol4 =c(106,-102,140,-200,-180),
  mycol5 =c(-105,-106,120,-150,-180)
)

Desired data:


desired_df <- data.frame(
  firstmax=c("mycol2 111","mycol3 -102","mycol1 140","mycol1 140", "mycol1 -150"),
  secondmax=c("mycol4 106", "mycol4 -102", "mycol4 140","mycol5 -150","mycol2 -150"),
  stringsAsFactors = F
)

CodePudding user response:

This is probably not the most efficient way to go about it but it gets the job done,

vals <- t(apply(example_df, 1, \(i)sort(i, decreasing = TRUE))[1:2,])
nms <- t(apply(example_df, 1, \(i)names(i)[order(i, decreasing = TRUE)])[1:2,])

mapply(\(x, y) paste(x, y, sep = ' '), data.frame(nms), data.frame(vals))

     X1            X2           
[1,] "mycol2 111"  "mycol4 106" 
[2,] "mycol3 -102" "mycol4 -102"
[3,] "mycol1 140"  "mycol4 140" 
[4,] "mycol1 140"  "mycol5 -150"
[5,] "mycol1 -150" "mycol2 -150"

You can tidy up the output the way you want. You can also play around with max.col() to somewhat vectorize it

CodePudding user response:

Here is one way with max.col - get the max column index per row, use that index to replace the values to -Inf, extract the second index and create a data.frame after pasteing with the column names extracted

first <- max.col(example_df, 'first')
rn <- seq_len(nrow(example_df))
second <- max.col(replace(example_df, cbind(rn, first), -Inf), "first")
data.frame(firstmax = paste(names(example_df)[first], 
      example_df[cbind(rn, first)]), 
   secondmax = paste(names(example_df)[second], example_df[cbind(rn, second)]))

-output

    firstmax   secondmax
1  mycol2 111  mycol4 106
2 mycol3 -102 mycol4 -102
3  mycol1 140  mycol4 140
4  mycol1 140 mycol5 -150
5 mycol1 -150 mycol2 -150

CodePudding user response:

Here is a data.table approach:

  1. Melt Long
library(data.table)

df_long =   melt(setDT(example_df)[, i:=.I],"i")[
  order(-value),.SD[1:2,.(paste(variable,value),c("firstmax", "secondmax"))],i]
  1. Dcast wide
dcast(df_long, i~V2, value.var="V1")[,c(2,3)]

Output:

      firstmax   secondmax
1:  mycol2 111  mycol4 106
2: mycol3 -102 mycol4 -102
3:  mycol1 140  mycol4 140
4:  mycol1 140 mycol5 -150
5: mycol1 -150 mycol2 -150

CodePudding user response:

Here is my take on the query using loops

example_list<-list()  #stores col name and value of highest and second highest
for (i in 1:nrow(example_df)){
example_list[[i]]<-sort(example_df[i,],decreasing = TRUE)[1:2]}
example_list2<-list() #list with 1st sublevel containing highest colname and value and 2nd sublevel containing 2nd highest colname and value for each level of the list
for (i in 1:length(example_list)){
jnk<-list()
for (j in 1:length(example_list[[i]])){
jnk[[j]]<-c(colnames(example_list[[i]][j]),as.numeric(example_list[[i]][j]))
example_list2[[i]]<-jnk}}
first_max<-list() #list with colname and value of highest 
for (i in 1:length(example_list2)){
first_max[[i]]<-example_list2[[i]][[1]]}
second_max<-list() #list with colname and value of 2nd highest
for (i in 1:length(example_list2)){
second_max[[i]]<-example_list2[[i]][[2]]}
first_max2<-as.character() #colname and value of highest pasted together
for (i in 1:length(first_max)){
first_max2[i]<-paste0(first_max[[i]],sep=" ",collapse = "")}
second_max2<-as.character() #colname and value of 2nd highest pasted together
for (i in 1:length(second_max)){
second_max2[i]<-paste0(second_max[[i]],sep=" ",collapse = "")}
final_df<-data.frame(firstmax=first_max2,secondmax=second_max2) #desired df
  • Related