Home > database >  Arrange/sort grouped dataframe according to the lag(prior) group
Arrange/sort grouped dataframe according to the lag(prior) group

Time:10-04

I would like to arrange/sort a grouped df according to a variable from the lag(=prior) group.

Group 1 should be arranged after b from a1 to a4

Group 2 a should be arranged according to Group 1 b

Group 3 a should be arranged according to Group 2 b

Example data:

dat <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
                                3L), a = c("new", "new", "new", "new", "a2", "a1", "a3", "b1", 
                                           "b3", "b2"), b = c("a2", "a4", "a3", "a1", "b1", "b2", "b3", 
                                                              "c", "c", "c")), class = "data.frame", row.names = c(NA, -10L
                                                              ))

dat1 <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
                                 3L), a = c("new", "new", "new", "new", "a1", "a2", "a3", "b2", 
                                            "b1", "b3"), b = c("a1", "a2", "a3", "a4", "b2", "b1", "b3", 
                                                               "c", "c", "c")), class = "data.frame", row.names = c(NA, -10L
                                                               ))

Initial data

dat
#>    group   a  b
#> 1      1 new a2
#> 2      1 new a4
#> 3      1 new a3
#> 4      1 new a1
#> 5      2  a2 b1
#> 6      2  a1 b2
#> 7      2  a3 b3
#> 8      3  b1  c
#> 9      3  b3  c
#> 10     3  b2  c

Desired output

dat1
#>    group   a  b
#> 1      1 new a1
#> 2      1 new a2
#> 3      1 new a3
#> 4      1 new a4
#> 5      2  a1 b2
#> 6      2  a2 b1
#> 7      2  a3 b3
#> 8      3  b2  c
#> 9      3  b1  c
#> 10     3  b3  c

A dplyr soulution would be preferable. However, I appreciate every hint.

CodePudding user response:

library(tidyverse)
dat <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
 3L), a = c("new", "new", "new", "new", "a2", "a1", "a3", "b1", 
 "b3", "b2"), b = c("a2", "a4", "a3", "a1", "b1", "b2", "b3", 
 "c", "c", "c")), class = "data.frame", row.names = c(NA, -10L))


fArrange = function(data, group, unique_group){
  if(group$group!=unique_group[1]){
    data = data %>% 
      mutate(a = a %>% factor(lev_prev)) %>% 
      arrange(a) %>% 
      mutate(a = a %>% as.character())
  } else {
    data = data %>% arrange(b)
  }
  if(group$group!=unique_group[length(unique_group)]){
    lev_prev <<- data$b
  } else {rm(lev_prev, envir=globalenv())}
  data
}

dat %>% as_tibble() %>% 
  group_by(group) %>% 
  group_modify(~fArrange(.x, .y, unique(dat$group)))

output

# A tibble: 10 x 3
# Groups:   group [3]
   group a     b    
   <int> <chr> <chr>
 1     1 new   a1   
 2     1 new   a2   
 3     1 new   a3   
 4     1 new   a4   
 5     2 a1    b2   
 6     2 a2    b1   
 7     2 a3    b3   
 8     3 b2    c    
 9     3 b1    c    
10     3 b3    c   

The key here is the function fArrange which creates a temporary variable in Gcobal Environment called lev_prev that stores the levels from the previous group.

CodePudding user response:

Here is a basic procedure how you can do it with dplyr: Logic:

  1. Create a list with groups
  2. apply the conditions to each element in the list (=group df)
  3. bind them together.
library(dplyr)

# create a list of dfs
dat_list <- dat %>% 
  group_split(group)

# apply condition to group 1
dat_list_1  <- dat_list[[1]] %>% 
  arrange(b)

# apply condition to group 2
dat_list_2  <- dat_list[[2]] %>% 
  arrange(a[rank(dat_list_1[1:3,3], ties.method = "last")])

# apply condition to group 3
dat_list_3  <- dat_list[[3]] %>% 
  arrange(a[rank(dat_list_3[1:3,3], ties.method = "last")])

# bind them all to resulting dataframe
dat1 <- bind_rows(dat_list_1, dat_list_2, dat_list_3)

Output:

   group a     b    
   <int> <chr> <chr>
 1     1 new   a1   
 2     1 new   a2   
 3     1 new   a3   
 4     1 new   a4   
 5     2 a1    b2   
 6     2 a2    b1   
 7     2 a3    b3   
 8     3 b2    c    
 9     3 b1    c    
10     3 b3    c    
  • Related