Home > Software design >  Rearrange the dataframe based on characters in R
Rearrange the dataframe based on characters in R

Time:10-03

I have data frame like this

    structure(list(H021 = c("snv;stop", "", "", "", "", "FD;Insert", 
"", "", "", "", ""), H022 = c("", "", "", "", "", "LN;RN", "", 
"FD;Insert", "", "FN;RNV", ""), H023 = c("", "", "", "snv;stop", 
"", "ST: RV", "", "", "", "", ""), H024 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), H025 = c("", "", "", "", "FD;Insert", 
"", "", "", "", "snv;stop", ""), H026 = c("", "FD;Insert", "", 
"", "", "", "", "", "", "", ""), H027 = c("", "", "", "", "", 
"", "snv;stop", "", "", "", ""), H028 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c("ACD1", 
"ACD2", "ACD3", "ACD4", "ACD5", "ACD6", "ACD7", "ACD8", "ACD9", 
"ACD10", "ACD11"))

I want it to rearrange like this:

structure(list(H021 = c("FD;Insert", "", "snv;stop", "", "", 
"", "", "", "", "", ""), H022 = c("LN;RN", "FN;RNV", "", "", 
"", "", "", "", "FD;Insert", "", ""), H023 = c("ST: RV", "", 
"", "", "", "snv;stop", "", "", "", "", ""), H024 = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), H025 = c("", "snv;stop", 
"", "", "", "", "FD;Insert", "", "", "", ""), H026 = c("", "", 
"", "FD;Insert", "", "", "", "", "", "", ""), H027 = c("", "", 
"", "", "", "", "", "snv;stop", "", "", ""), H028 = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c("ACD6", 
"ACD10", "ACD1", "ACD2", "ACD3", "ACD4", "ACD5", "ACD7", "ACD8", 
"ACD9", "ACD11"))

Basically, I want to move the row having maximum characters in each column on the top. I have tried

 df = data %>%
      arrange(desc(data))

But, I am not getting the desired results.

Thanks in advance!!

CodePudding user response:

I think the approach below is what you are looking for.

We first use across(.fns = nchar) to get the number of characters of each cell. Then we take the rowSums() of this and arrange() descending.

library(dplyr)

dat %>% 
  arrange(desc(
    rowSums(across(.fns = nchar), na.rm = TRUE)
))

#>            H021      H022     H023 H024      H025      H026     H027 H028
#> ACD6  FD;Insert     LN;RN   ST: RV   NA                                NA
#> ACD10              FN;RNV            NA  snv;stop                      NA
#> ACD2                                 NA           FD;Insert            NA
#> ACD5                                 NA FD;Insert                      NA
#> ACD8            FD;Insert            NA                                NA
#> ACD1   snv;stop                      NA                                NA
#> ACD4                      snv;stop   NA                                NA
#> ACD7                                 NA                     snv;stop   NA
#> ACD3                                 NA                                NA
#> ACD9                                 NA                                NA
#> ACD11                                NA                                NA

The data


dat <- structure(list(H021 = c("snv;stop", "", "", "", "", "FD;Insert", 
"", "", "", "", ""), H022 = c("", "", "", "", "", "LN;RN", "", 
"FD;Insert", "", "FN;RNV", ""), H023 = c("", "", "", "snv;stop", 
"", "ST: RV", "", "", "", "", ""), H024 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), H025 = c("", "", "", "", "FD;Insert", 
"", "", "", "", "snv;stop", ""), H026 = c("", "FD;Insert", "", 
"", "", "", "", "", "", "", ""), H027 = c("", "", "", "", "", 
"", "snv;stop", "", "", "", ""), H028 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c("ACD1", 
"ACD2", "ACD3", "ACD4", "ACD5", "ACD6", "ACD7", "ACD8", "ACD9", 
"ACD10", "ACD11"))

Created on 2022-10-02 by the reprex package (v0.3.0)

CodePudding user response:

Without having to load additional packages you can do this in a one-liner.

df1[order(-rowSums(sapply(df1, nchar), na.rm=TRUE)), ]
#            H021      H022     H023 H024      H025      H026     H027 H028
# ACD6  FD;Insert     LN;RN   ST: RV   NA                                NA
# ACD10              FN;RNV            NA  snv;stop                      NA
# ACD2                                 NA           FD;Insert            NA
# ACD5                                 NA FD;Insert                      NA
# ACD8            FD;Insert            NA                                NA
# ACD1   snv;stop                      NA                                NA
# ACD4                      snv;stop   NA                                NA
# ACD7                                 NA                     snv;stop   NA
# ACD3                                 NA                                NA
# ACD9                                 NA                                NA
# ACD11                                NA                                NA

Data:

df1 <- structure(list(H021 = c("snv;stop", "", "", "", "", "FD;Insert", 
"", "", "", "", ""), H022 = c("", "", "", "", "", "LN;RN", "", 
"FD;Insert", "", "FN;RNV", ""), H023 = c("", "", "", "snv;stop", 
"", "ST: RV", "", "", "", "", ""), H024 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), H025 = c("", "", "", "", "FD;Insert", 
"", "", "", "", "snv;stop", ""), H026 = c("", "FD;Insert", "", 
"", "", "", "", "", "", "", ""), H027 = c("", "", "", "", "", 
"", "snv;stop", "", "", "", ""), H028 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c("ACD1", 
"ACD2", "ACD3", "ACD4", "ACD5", "ACD6", "ACD7", "ACD8", "ACD9", 
"ACD10", "ACD11"))
  • Related