I would like to arrange/sort a grouped df according to a variable from the lag(=prior) group.
Group 1
should be arranged after b
from a1
to a4
Group 2
a
should be arranged according to Group 1
b
Group 3
a
should be arranged according to Group 2
b
Example data:
dat <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), a = c("new", "new", "new", "new", "a2", "a1", "a3", "b1",
"b3", "b2"), b = c("a2", "a4", "a3", "a1", "b1", "b2", "b3",
"c", "c", "c")), class = "data.frame", row.names = c(NA, -10L
))
dat1 <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), a = c("new", "new", "new", "new", "a1", "a2", "a3", "b2",
"b1", "b3"), b = c("a1", "a2", "a3", "a4", "b2", "b1", "b3",
"c", "c", "c")), class = "data.frame", row.names = c(NA, -10L
))
Initial data
dat
#> group a b
#> 1 1 new a2
#> 2 1 new a4
#> 3 1 new a3
#> 4 1 new a1
#> 5 2 a2 b1
#> 6 2 a1 b2
#> 7 2 a3 b3
#> 8 3 b1 c
#> 9 3 b3 c
#> 10 3 b2 c
Desired output
dat1
#> group a b
#> 1 1 new a1
#> 2 1 new a2
#> 3 1 new a3
#> 4 1 new a4
#> 5 2 a1 b2
#> 6 2 a2 b1
#> 7 2 a3 b3
#> 8 3 b2 c
#> 9 3 b1 c
#> 10 3 b3 c
A dplyr
soulution would be preferable. However, I appreciate every hint.
CodePudding user response:
library(tidyverse)
dat <- structure(list(group = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), a = c("new", "new", "new", "new", "a2", "a1", "a3", "b1",
"b3", "b2"), b = c("a2", "a4", "a3", "a1", "b1", "b2", "b3",
"c", "c", "c")), class = "data.frame", row.names = c(NA, -10L))
fArrange = function(data, group, unique_group){
if(group$group!=unique_group[1]){
data = data %>%
mutate(a = a %>% factor(lev_prev)) %>%
arrange(a) %>%
mutate(a = a %>% as.character())
} else {
data = data %>% arrange(b)
}
if(group$group!=unique_group[length(unique_group)]){
lev_prev <<- data$b
} else {rm(lev_prev, envir=globalenv())}
data
}
dat %>% as_tibble() %>%
group_by(group) %>%
group_modify(~fArrange(.x, .y, unique(dat$group)))
output
# A tibble: 10 x 3
# Groups: group [3]
group a b
<int> <chr> <chr>
1 1 new a1
2 1 new a2
3 1 new a3
4 1 new a4
5 2 a1 b2
6 2 a2 b1
7 2 a3 b3
8 3 b2 c
9 3 b1 c
10 3 b3 c
The key here is the function fArrange
which creates a temporary variable in Gcobal Environment called lev_prev
that stores the levels from the previous group.
CodePudding user response:
Here is a basic procedure how you can do it with dplyr
:
Logic:
- Create a list with groups
- apply the conditions to each element in the list (=group df)
- bind them together.
library(dplyr)
# create a list of dfs
dat_list <- dat %>%
group_split(group)
# apply condition to group 1
dat_list_1 <- dat_list[[1]] %>%
arrange(b)
# apply condition to group 2
dat_list_2 <- dat_list[[2]] %>%
arrange(a[rank(dat_list_1[1:3,3], ties.method = "last")])
# apply condition to group 3
dat_list_3 <- dat_list[[3]] %>%
arrange(a[rank(dat_list_3[1:3,3], ties.method = "last")])
# bind them all to resulting dataframe
dat1 <- bind_rows(dat_list_1, dat_list_2, dat_list_3)
Output:
group a b
<int> <chr> <chr>
1 1 new a1
2 1 new a2
3 1 new a3
4 1 new a4
5 2 a1 b2
6 2 a2 b1
7 2 a3 b3
8 3 b2 c
9 3 b1 c
10 3 b3 c