add the number of specific rows to a summarise() call-CodePudding

I've tried the following reproducible data and code to get my Current_output:

"study n_effect
     3        2
     5        2
     8        2"

However, I wonder how to extend my code to get my Desired_output?:

"study n_effect  composite  subscale
     3        2      1      1        # Add the number of rows for reporting
     5        2      1      1        # That are composite and subscale
     8        2      1      1"

library(tidyverse)

m="
study subscale  reporting  obs include yi   vi         study_type
1        A      subscale   1   yes     1.94 0.33503768 standard
1        A      subscale   2   yes     1.06 0.01076604 standard
2        A      subscale   3   yes     2.41 0.23767389 standard
2        A      subscale   4   yes     2.34 0.37539841 standard
3        A&C    composite  5   yes     3.09 0.31349510 standard
3        A&C     subscale  6   yes     3.99 0.01349510 standard
4        A&B    composite  7   yes     2.90 0.91349510 standard
4        A&B    composite  8   yes     3.01 0.99349510 standard
5        G&H     subscale  9   yes     1.01 0.99910197 alternative
5        G&H    composite  10  yes     2.10 0.97910095 alternative
6        E&G    composite  11  yes     0.11 0.27912095 alternative
6        E&G    composite  12  yes     3.12 0.87910095 alternative
7        E      subscale   13  yes     0.08 0.21670360 alternative
7        G      subscale   14  yes     1.00 0.91597190 alternative
8        F      subscale   15  yes     1.08 0.81670360 alternative
8        E      composite  16  yes     0.99 0.91297170 alternative"
data <- read.table(text=m,h=T)

data %>% 
  group_by(study) %>% 
  filter(all(c("composite","subscale") %in% reporting)) %>%
  summarise(study = study, n_effect = n()) %>% distinct()


Current_output =
"study n_effect
     3        2
     5        2
     8        2"

Desired_output =
"study n_effect  composite  subscale
     3        2      1      1
     5        2      1      1
     8        2      1      1"

CodePudding user response：

Instead of distinct reshape to 'wide' with pivot_wider

library(dplyr)
library(tidyr)
data %>% 
  group_by(study) %>% 
  filter(all(c("composite","subscale") %in% reporting)) %>% 
  transmute(reporting, neffect = n() ) %>% 
  ungroup %>% 
  pivot_wider(names_from = reporting, values_from = reporting, values_fn = length)

-output

# A tibble: 3 × 4
  study neffect composite subscale
  <int>   <int>     <int>    <int>
1     3       2         1        1
2     5       2         1        1
3     8       2         1        1

If there are only two levels in 'reporting', then create a logical vector and get the sum in summarise

data %>% 
  group_by(study) %>% 
  filter(all(c("composite","subscale") %in% reporting)) %>%
  summarise(n_effect = n(), 
            composite = sum(reporting == 'composite'), 
            subscale = sum(reporting == 'subscale'), .groups = 'drop')

-output

# A tibble: 3 × 4
  study n_effect composite subscale
  <int>    <int>     <int>    <int>
1     3        2         1        1
2     5        2         1        1
3     8        2         1        1

NOTE: When the data is grouped, the grouping column is automatically added in summarise