how to make a table with event count and people count at the same time-CodePudding

I have a df that I would like to get the event count and people count by the result and relation.

The final outcome will have following info:

Currently I calculated each value and then fill them into matrix. I feel I might work in the wrong way amke a simple process too complicated. Could anyone give me some guidance on how to make this simple and neat?

The sample data are:

df<-structure(list(Pass = c("N", "N", "N", "N", "N", "N", "N", "Y", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "Y", "N", "Y", "N", "Y", "N", "Y", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", NA, "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", NA, "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), 
    Relation = c("UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "RELATED", "RELATED", "RELATED", 
    "RELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "RELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "RELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", 
    "RELATED", "RELATED", "UNRELATED", "UNRELATED", "RELATED", 
    "RELATED", "UNRELATED", "RELATED", "RELATED", "UNRELATED", 
    "RELATED", "UNRELATED", "UNRELATED", "UNRELATED", "RELATED", "RELATED", 
    "RELATED", "RELATED", "RELATED", "RELATED", "RELATED", "RELATED", 
    "RELATED"), ID = c("01-01-101", "01-01-101", "01-01-101", 
    "01-01-101", "01-01-101", "01-01-101", "01-01-101", "01-01-101", 
    "01-02-102", "01-02-102", "01-02-102", "01-02-102", "01-02-102", 
    "01-02-102", "01-02-102", "01-05-202", "01-05-202", "01-06-203", 
    "01-06-203", "01-11-402", "01-11-402", "01-11-402", "01-13-501", 
    "01-13-501", "01-13-501", "01-13-501", "01-13-501", "01-13-501", 
    "02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301", 
    "02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301", 
    "02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301", 
    "02-07-301", "02-07-301", "02-10-401", "02-10-401", "02-10-401", 
    "03-04-201", "03-04-201", "03-04-201", "03-04-201", "03-04-201", 
    "03-04-201", "03-04-201", "03-04-201", "03-09-303", "03-09-303", 
    "03-09-303", "03-09-303", "03-09-303", "03-09-303", "05-08-302", 
    "05-08-302", "05-08-302", "05-08-302", "05-12-403", "05-12-403", 
    "06-03-103", "06-03-103", "06-03-103", "06-03-103")), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -72L), spec = structure(list(
    cols = list(Pass = structure(list(), class = c("collector_character", 
    "collector")), Relation = structure(list(), class = c("collector_character", 
    "collector")), ID = structure(list(), class = c("collector_character", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), class = "col_spec"))

My current way to calculate each part are:

## Event count for related &  unrelated
events <- df %>% 
  mutate(Pass = factor(Pass, levels = c("N","Y", NA))) %>%
  group_by(Pass) %>% 
  summarise(count = paste0(n()))

# Event count for related
events_rl <- df %>% 
  filter (Relation=="RELATED" %>%
  mutate(Pass = factor(Pass, levels = c("N","Y", NA))) %>%
  group_by(Pass) %>% 
  summarise(count = paste0(n()))

# People count for related & unrelated
people <- df  %>% 
  distinct(ID,Pass) %>% group_by(Pass) %>% summarise(pcount = n())

CodePudding user response：

Not sure if this is what you are aiming for, using dplyr and tidyr you can do this...

library(dplyr)
library(tidyr)

#dataframe with all the relevant data

df1 <- 
  df %>% 
  mutate(Relation = "Related&NotRelated") %>%
  bind_rows(df) %>% 
  filter(Relation != "UNRELATED")
  
#summary  data for events

df_e <- 
  df1 %>% 
  group_by(Pass, Relation) %>% 
  summarise(count = n()) %>% 
  mutate(id = "e")


#summary  data for people, combine with events and pivot for presentation
  df1 %>% 
  distinct(ID, Pass, Relation) %>% 
  group_by(Pass, Relation) %>% 
  summarise(count = n()) %>%
  mutate(id = "p") %>% 
  bind_rows(df_e) %>% 
  mutate(Relation = paste(id, Relation, sep = "_")) %>% 
  select(-id) %>% 
  pivot_wider(names_from = Relation, values_from = count)


#> # A tibble: 3 x 5
#> # Groups:   Pass [3]
#>   Pass  p_RELATED `p_Related&NotRelated` e_RELATED `e_Related&NotRelated`
#>   <chr>     <int>                  <int>     <int>                  <int>
#> 1 N             8                     13        22                     65
#> 2 Y            NA                      3        NA                      5
#> 3 <NA>         NA                      2        NA                      2

^{Created on 2021-09-24 by the reprex package (v2.0.0)}