Home > Software engineering >  Reorder dataframe by rank in R
Reorder dataframe by rank in R

Time:03-24

I have the following data frame (tibble):

# A tibble: 5 × 11

      ID     V1     V2    V3     V4     V5    R1    R2    R3      R4    R5
    <chr> <chr>   <chr>  <chr>  <chr>  <chr> <chr> <chr> <chr>  <chr>  <chr>
1      A    X1      X2    X3     X4     X5     1     2     3      4     5
2      B    X6      X7    X8     X9     X10    5     4     3      2     1
3      C    X11     X12   X13    X14    X15    2     1     4      3     5
4      D    X16     X17   X18    X19    X20    1     2     3      4     5
5      E    X21     X22   X23    X24    X25    5     4     3      2     1

Now, I want to transform this so that the positions of the values in V1-V5 are rearranged based on their respective rank in R1-R2 as follows:

# A tibble: 5 × 6

      ID     V1     V2    V3     V4     V5   
    <chr> <chr>   <chr>  <chr>  <chr>  <chr> 
1      A    X1      X2    X3     X4     X5   
2      B    X10     X9   X8      X7     X6 
3      C    X12     X11   X14    X13    X15   
4      D    X16     X17   X18    X19    X20  
5      E    X25     X24   X23    X22    X21 
`

CodePudding user response:

We could reshape to 'long' format and then do the reorder and reshape back to 'wide'

library(dplyr)
library(tidyr)
df1 %>% 
  pivot_longer(cols = -ID, names_to = c(".value", "grp"), 
     names_pattern = "(\\D )(\\d )") %>% 
  group_by(ID) %>% 
  mutate(V = replace(V, !is.na(R), V[order(R[!is.na(R)])])) %>%
  ungroup %>%
  select(-R) %>%   
  pivot_wider(names_from = grp, values_from = V, names_prefix = "V")

-output

# A tibble: 5 × 6
  ID    V1    V2    V3    V4    V5   
  <chr> <chr> <chr> <chr> <chr> <chr>
1 A     X1    X2    X3    X4    X5   
2 B     X10   X9    X8    X7    X6   
3 C     X12   X11   X14   X13   X15  
4 D     X16   X17   X18   X19   X20  
5 E     X25   X24   X23   X22   X21  

If we want to remove the '0' or NA elements in 'R'

df1 %>% 
  pivot_longer(cols = -ID, names_to = c(".value", "grp"), 
     names_pattern = "(\\D )(\\d )") %>%
  filter(complete.cases(R), R != 0) %>%
  group_by(ID) %>%
  mutate(V = V[order(R)]) %>%
  ungroup %>%
  select(-R) %>%
  pivot_wider(names_from = grp, values_from = V, names_prefix = "V")

Or using base R

cbind(df1[1], t(apply(df1[-1], 1, \(x) x[1:5][as.numeric(x[6:10])])))

data

df1 <- structure(list(ID = c("A", "B", "C", "D", "E"), V1 = c("X1", 
"X6", "X11", "X16", "X21"), V2 = c("X2", "X7", "X12", "X17", 
"X22"), V3 = c("X3", "X8", "X13", "X18", "X23"), V4 = c("X4", 
"X9", "X14", "X19", "X24"), V5 = c("X5", "X10", "X15", "X20", 
"X25"), R1 = c(1L, 5L, 2L, 1L, 5L), R2 = c(2L, 4L, 1L, 2L, 4L
), R3 = c(3L, 3L, 4L, 3L, 3L), R4 = c(4L, 2L, 3L, 4L, 2L), R5 = c(5L, 
1L, 5L, 5L, 1L)), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5"))

CodePudding user response:

Here is another way using purrr.

library(dplyr)
library(purrr)

ids <- df1$ID
df1_v <- select(df1, V1:V5)
df1_r <- select(df1, R1:R5)

map2_dfr(set_names(transpose(df1_v), ids), transpose(df1_r),
         ~ set_names(unlist(.x[unlist(.y)]), names(df1_v)),
         .id = "ID")
# # A tibble: 5 x 6
#   ID    V1    V2    V3    V4    V5   
#   <chr> <chr> <chr> <chr> <chr> <chr>
# 1 A     X1    X2    X3    X4    X5   
# 2 B     X10   X9    X8    X7    X6   
# 3 C     X12   X11   X14   X13   X15  
# 4 D     X16   X17   X18   X19   X20  
# 5 E     X25   X24   X23   X22   X21 

Here is another way using nested data frames, which can be very helpful.

library(dplyr)
library(purrr)
library(tidyr)

df1 %>% 
  nest(V = starts_with("V"),
       R = starts_with("R")) %>% 
  mutate(V = map2(V, R, ~ set_names((.x)[unlist(.y)], names(.x)))) %>% 
  select(-R) %>% 
  unnest(V)
  •  Tags:  
  • r
  • Related