consider I have this sample data:
ID <- c(1:10)
group <- c("A","A","A","B","B","B","B","B","B","B")
condition_tall <- c(0,1,1,1,1,0,0,0,1,1)
condition_long <- c(1,1,1,1,0,0,0,1,1,1)
condition_wide <- c(1,1,0,0,0,1,1,1,1,0)
check_tall <- c(1,1,1,1,1,1,0,1,0,1)
check_long <- c(1,1,1,1,1,1,0,1,0,1)
check_wide <- c(1,1,0,1,0,1,0,1,0,1)
dat <- data.frame(ID,group,condition_tall,condition_long,condition_wide,check_tall,check_long,check_wide)
dat
What would the most efficient way bet o generate a summary table like this in R? I want the counts and percentages by group, for "condition" and "check." Thank you so much.
Group A | Group B | ||||||||
---|---|---|---|---|---|---|---|---|---|
Variable | Condition (N) | Condition (%) | Check (N) | Check (%) | Condition (N) | Condition (%) | Check (N) | Check (%) | |
tall | |||||||||
long | |||||||||
wide | |||||||||
CodePudding user response:
You could use the tidyverse packages to reshape your data, compute the summaries you want, and then pivot the data back to wide format:
library(tidyverse)
wide_dat <- dat %>%
pivot_longer(-c(ID, group), names_sep = '_', names_to = c('metric', 'variable')) %>%
group_by(group, metric, variable) %>%
summarize(
n = sum(value),
pct = mean(value)
) %>%
pivot_wider(names_from = c(group, metric), values_from = c(n, pct), names_glue = '{group}_{metric}_{.value}', names_vary = 'slowest')
wide_dat
variable A_check_n A_check_pct A_condition_n A_condition_pct B_check_n B_check_pct B_condition_n B_condition_pct
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 long 3 1 3 1 5 0.714 4 0.571
2 tall 3 1 2 0.667 5 0.714 4 0.571
3 wide 2 0.667 2 0.667 4 0.571 4 0.571
CodePudding user response:
dat %>%
group_by(group) %>%
summarise(across(-ID, list(n=sum, pct=mean))) %>%
pivot_longer(-group, c('name', 'var', 'name1'),names_sep = '_') %>%
pivot_wider(var, names_from = c(group, name, name1))
results
# A tibble: 3 x 9
var A_condition_n A_condition_pct A_check_n A_check_pct B_condition_n B_condition_pct B_check_n B_check_pct
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 tall 2 0.667 3 1 4 0.571 5 0.714
2 long 3 1 3 1 4 0.571 5 0.714
3 wide 2 0.667 2 0.667 4 0.571 4 0.571
Another quick way:
fn <- ~list(c(n=sum(.x),pct=mean(.x)))
dat %>%
pivot_longer(-c(ID, group), c('name1', 'var'), names_sep = '_') %>%
pivot_wider(var, names_from = c(group, name1), values_fn = fn) %>%
unnest_wider(-var, names_sep = '_')
Results:
# A tibble: 3 x 9
var A_condition_n A_condition_pct A_check_n A_check_pct B_condition_n B_condition_pct B_check_n B_check_pct
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 tall 2 0.667 3 1 4 0.571 5 0.714
2 long 3 1 3 1 4 0.571 5 0.714
3 wide 2 0.667 2 0.667 4 0.571 4 0.571