Question
For the new data, there are many more values, because there are values for east
, north
, south
and west
. These values should be concatenated to make one long list of frequencies, one for each type.
The start can be adapted as follows:
part1 <- dat_in_new %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper', 'rest'), '(\\d ),(\\d )[\\]\\)]\\s*(\\w*)', convert = TRUE) %>%
select(-where(is_all_na))
But I get stuck in the following part, because I do not really understand the lines after group_by(rn)
, especially the .groups = 'drop_last'
part (link). I usually just run the code line by line to see what happens, but for this solution that did not really help:
part2 <- part2 %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
Currently, the code runs, but I end up with the old outcome. How should I adapt this code to add all the values?
CodePudding user response:
We may use pivot_longer
as
library(dplyr)
library(tidyr)
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d ),(\\d )[\\)\\]]\\s (\\S $)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 8 × 3
rn direction freq
<chr> <chr> <list>
1 Type_A east <int [7]>
2 Type_A north <int [7]>
3 Type_A south <int [7]>
4 Type_A west <int [7]>
5 Type_B east <int [6]>
6 Type_B north <int [6]>
7 Type_B south <int [6]>
8 Type_B west <int [6]>
If we want only by 'rn'
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d ),(\\d )[\\)\\]]\\s (\\S $)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>% group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 2 × 2
rn freq
<chr> <list>
1 Type_A <int [28]>
2 Type_B <int [24]>