Home > front end >  Sorting column names by two numbers
Sorting column names by two numbers

Time:07-08

I recently got this amazing answer from JBGruber, to order string columns with double numerical values, which works on both datasets at the bottom of the post:

library(magrittr)
order_cols <- function(dat) {
  
  # look for words to order by
  s_ordered <- stringi::stri_extract_all_regex(colnames(dat), "[[:alpha:]] ") %>% 
    unlist() %>% 
    unique() %>% 
    sort()
  
  if (length(s_ordered) > 1) {
    # replace words with their alphabetical index
    cnames <- stringi::stri_replace_all_fixed(colnames(dat), s_ordered, seq_along(s_ordered), vectorise_all = FALSE)
  } else {
    cnames <- colnames(dat)
  }
  
  cnames %>% 
    stringi::stri_extract_all_regex("\\d ") %>% # extract all numbers (including the alphabetical index numbers)
    lapply(as.numeric) %>% 
    lapply(sum) %>% 
    unlist() %>% 
    order()
  
}

However, I noticed that for the following data it does not completely work, because it is based on the assumption that the sum of the numbers in order give the write order of the columns:

dat_I <- structure(list(`[25,250)`=3L, `[0,25)` = 5L, `[100,250)` = 43L, `[100,500)` = 0L, 
    `[1000,1000000]` = 20L, `[1000,1500)` = 0L, `[1500,3000)` = 0L, 
    `[25,100)` = 38L, `[25,50)` = 0L, `[250,500)` = 27L, `[3000,1000000]` = 0L, 
    `[50,100)` = 0L, `[500,1000)` = 44L, `[500,1000000]` = 0L), row.names = "Type_A", class = "data.frame")

colnames(dat_I )[order_cols(dat_I)]

Is there a way to first order by the first element and then order by the second element?

Old Data

dat_I <- structure(list(`[0,25)` = 5L, `[100,250)` = 43L, `[100,500)` = 0L, 
    `[1000,1000000]` = 20L, `[1000,1500)` = 0L, `[1500,3000)` = 0L, 
    `[25,100)` = 38L, `[25,50)` = 0L, `[250,500)` = 27L, `[3000,1000000]` = 0L, 
    `[50,100)` = 0L, `[500,1000)` = 44L, `[500,1000000]` = 0L), row.names = "Type_A", class = "data.frame")

dat_II <- structure(list(`[0,25) east` = c(1269L, 85L), `[0,25) north` = c(364L, 
21L), `[0,25) south` = c(1172L, 97L), `[0,25) west` = c(549L, 
49L), `[100,250) east` = c(441L, 149L), `[100,250) north` = c(224L, 
45L), `[100,250) south` = c(521L, 247L), `[100,250) west` = c(770L, 
124L), `[100,500) east` = c(0L, 0L), `[100,500) north` = c(0L, 
0L), `[100,500) south` = c(0L, 0L), `[100,500) west` = c(0L, 
0L), `[1000,1000000] east` = c(53L, 0L), `[1000,1000000] north` = c(82L, 
0L), `[1000,1000000] south` = c(23L, 0L), `[1000,1000000] west` = c(63L, 
0L), `[1000,1500) east` = c(0L, 0L), `[1000,1500) north` = c(0L, 
0L), `[1000,1500) south` = c(0L, 0L), `[1000,1500) west` = c(0L, 
0L), `[1500,3000) east` = c(0L, 0L), `[1500,3000) north` = c(0L, 
0L), `[1500,3000) south` = c(0L, 0L), `[1500,3000) west` = c(0L, 
0L), `[25,100) east` = c(579L, 220L), `[25,100) north` = c(406L, 
58L), `[25,100) south` = c(1048L, 316L), `[25,100) west` = c(764L, 
131L), `[25,50) east` = c(0L, 0L), `[25,50) north` = c(0L, 0L
), `[25,50) south` = c(0L, 0L), `[25,50) west` = c(0L, 0L), `[250,500) east` = c(232L, 
172L), `[250,500) north` = c(207L, 40L), `[250,500) south` = c(202L, 
148L), `[250,500) west` = c(457L, 153L), `[3000,1000000] east` = c(0L, 
0L), `[3000,1000000] north` = c(0L, 0L), `[3000,1000000] south` = c(0L, 
0L), `[3000,1000000] west` = c(0L, 0L), `[50,100) east` = c(0L, 
0L), `[50,100) north` = c(0L, 0L), `[50,100) south` = c(0L, 0L
), `[50,100) west` = c(0L, 0L), `[500,1000) east` = c(103L, 0L
), `[500,1000) north` = c(185L, 0L), `[500,1000) south` = c(66L, 
0L), `[500,1000) west` = c(200L, 0L), `[500,1000000] east` = c(0L, 
288L), `[500,1000000] north` = c(0L, 120L), `[500,1000000] south` = c(0L, 
229L), `[500,1000000] west` = c(0L, 175L)), row.names = c("A", 
"B"), class = "data.frame")

CodePudding user response:

I modified the last three lines of the function so that the order is now based on each element successively.

order_cols <- function(dat) {
  
  # look for words to order by
  s_ordered <- stringi::stri_extract_all_regex(colnames(dat), "[[:alpha:]] ") %>% 
    unlist() %>% 
    unique() %>% 
    sort()
  
  if (length(s_ordered) > 1) {
    # replace words with their alphabetical index
    cnames <- stringi::stri_replace_all_fixed(colnames(dat), s_ordered, seq_along(s_ordered), vectorise_all = FALSE)
  } else {
    cnames <- colnames(dat)
  }
  
  cnames %>% 
    stringi::stri_extract_all_regex("\\d ") %>% # extract all numbers (including the alphabetical index numbers)
    lapply(as.numeric) %>% 
    do.call(rbind, .) %>%    # bind list items to a matrix
    as.data.frame %>%        # change the matrix to a data.frame (i.e. a list)
    do.call(order, .)        # use the list for ordering
}
colnames(dat_II)[order_cols(dat_II)]
# [1] "[0,25) east"          "[0,25) north"         "[0,25) south"        
# [4] "[0,25) west"          "[25,50) east"         "[25,50) north"       
# [7] "[25,50) south"        "[25,50) west"         "[25,100) east"       
# [10] "[25,100) north"       "[25,100) south"       "[25,100) west"       
# [13] "[50,100) east"        "[50,100) north"       "[50,100) south"      
# [16] "[50,100) west"        "[100,250) east"       "[100,250) north"     
# [19] "[100,250) south"      "[100,250) west"       "[100,500) east"      
# [22] "[100,500) north"      "[100,500) south"      "[100,500) west"      
# [25] "[250,500) east"       "[250,500) north"      "[250,500) south"     
# [28] "[250,500) west"       "[500,1000) east"      "[500,1000) north"    
# [31] "[500,1000) south"     "[500,1000) west"      "[500,1000000] east"  
# [34] "[500,1000000] north"  "[500,1000000] south"  "[500,1000000] west"  
# [37] "[1000,1500) east"     "[1000,1500) north"    "[1000,1500) south"   
# [40] "[1000,1500) west"     "[1000,1000000] east"  "[1000,1000000] north"
# [43] "[1000,1000000] south" "[1000,1000000] west"  "[1500,3000) east"    
# [46] "[1500,3000) north"    "[1500,3000) south"    "[1500,3000) west"    
# [49] "[3000,1000000] east"  "[3000,1000000] north" "[3000,1000000] south"
# [52] "[3000,1000000] west
  • Related