Create a summary table by group from a dataset with 30 columns in R-CodePudding

consider I have this sample data:

ID <- c(1:10)
group <- c("A","A","A","B","B","B","B","B","B","B")
condition_tall <- c(0,1,1,1,1,0,0,0,1,1)
condition_long <- c(1,1,1,1,0,0,0,1,1,1)
condition_wide <- c(1,1,0,0,0,1,1,1,1,0)
check_tall <- c(1,1,1,1,1,1,0,1,0,1)
check_long <- c(1,1,1,1,1,1,0,1,0,1)
check_wide <- c(1,1,0,1,0,1,0,1,0,1)

dat <- data.frame(ID,group,condition_tall,condition_long,condition_wide,check_tall,check_long,check_wide)
dat

What would the most efficient way bet o generate a summary table like this in R? I want the counts and percentages by group, for "condition" and "check." Thank you so much.

	Group A				Group B
Variable	Condition (N)	Condition (%)	Check (N)	Check (%)	Condition (N)	Condition (%)	Check (N)	Check (%)
tall
long
wide

CodePudding user response：

You could use the tidyverse packages to reshape your data, compute the summaries you want, and then pivot the data back to wide format:

library(tidyverse)

wide_dat <- dat %>% 
  pivot_longer(-c(ID, group), names_sep = '_', names_to = c('metric', 'variable')) %>% 
  group_by(group, metric, variable) %>% 
  summarize(
    n = sum(value),
    pct = mean(value)
  ) %>% 
  pivot_wider(names_from = c(group, metric), values_from = c(n, pct), names_glue = '{group}_{metric}_{.value}', names_vary = 'slowest')

wide_dat

 variable A_check_n A_check_pct A_condition_n A_condition_pct B_check_n B_check_pct B_condition_n B_condition_pct
  <chr>        <dbl>       <dbl>         <dbl>           <dbl>     <dbl>       <dbl>         <dbl>           <dbl>
1 long             3       1                 3           1             5       0.714             4           0.571
2 tall             3       1                 2           0.667         5       0.714             4           0.571
3 wide             2       0.667             2           0.667         4       0.571             4           0.571

CodePudding user response：

dat %>%
  group_by(group) %>%
  summarise(across(-ID, list(n=sum, pct=mean))) %>%
  pivot_longer(-group, c('name', 'var', 'name1'),names_sep = '_') %>%
  pivot_wider(var, names_from = c(group, name, name1))

results

# A tibble: 3 x 9
  var   A_condition_n A_condition_pct A_check_n A_check_pct B_condition_n B_condition_pct B_check_n B_check_pct
  <chr>         <dbl>           <dbl>     <dbl>       <dbl>         <dbl>           <dbl>     <dbl>       <dbl>
1 tall              2           0.667         3       1                 4           0.571         5       0.714
2 long              3           1             3       1                 4           0.571         5       0.714
3 wide              2           0.667         2       0.667             4           0.571         4       0.571

Another quick way:

fn <- ~list(c(n=sum(.x),pct=mean(.x)))

dat %>%
  pivot_longer(-c(ID, group), c('name1', 'var'), names_sep = '_') %>%
  pivot_wider(var, names_from = c(group, name1), values_fn = fn) %>%
  unnest_wider(-var, names_sep = '_')

Results:

# A tibble: 3 x 9
  var   A_condition_n A_condition_pct A_check_n A_check_pct B_condition_n B_condition_pct B_check_n B_check_pct
  <chr>         <dbl>           <dbl>     <dbl>       <dbl>         <dbl>           <dbl>     <dbl>       <dbl>
1 tall              2           0.667         3       1                 4           0.571         5       0.714
2 long              3           1             3       1                 4           0.571         5       0.714
3 wide              2           0.667         2       0.667             4           0.571         4       0.571