I have a df that I would like to get the event count and people count by the result and relation.
The final outcome will have following info:
Currently I calculated each value and then fill them into matrix. I feel I might work in the wrong way amke a simple process too complicated. Could anyone give me some guidance on how to make this simple and neat?
The sample data are:
df<-structure(list(Pass = c("N", "N", "N", "N", "N", "N", "N", "Y",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "Y", "N", "Y", "N", "Y", "N", "Y", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", NA, "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", NA, "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
Relation = c("UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "RELATED", "RELATED", "RELATED",
"RELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "RELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "RELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED", "UNRELATED",
"RELATED", "RELATED", "UNRELATED", "UNRELATED", "RELATED",
"RELATED", "UNRELATED", "RELATED", "RELATED", "UNRELATED",
"RELATED", "UNRELATED", "UNRELATED", "UNRELATED", "RELATED", "RELATED",
"RELATED", "RELATED", "RELATED", "RELATED", "RELATED", "RELATED",
"RELATED"), ID = c("01-01-101", "01-01-101", "01-01-101",
"01-01-101", "01-01-101", "01-01-101", "01-01-101", "01-01-101",
"01-02-102", "01-02-102", "01-02-102", "01-02-102", "01-02-102",
"01-02-102", "01-02-102", "01-05-202", "01-05-202", "01-06-203",
"01-06-203", "01-11-402", "01-11-402", "01-11-402", "01-13-501",
"01-13-501", "01-13-501", "01-13-501", "01-13-501", "01-13-501",
"02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301",
"02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301",
"02-07-301", "02-07-301", "02-07-301", "02-07-301", "02-07-301",
"02-07-301", "02-07-301", "02-10-401", "02-10-401", "02-10-401",
"03-04-201", "03-04-201", "03-04-201", "03-04-201", "03-04-201",
"03-04-201", "03-04-201", "03-04-201", "03-09-303", "03-09-303",
"03-09-303", "03-09-303", "03-09-303", "03-09-303", "05-08-302",
"05-08-302", "05-08-302", "05-08-302", "05-12-403", "05-12-403",
"06-03-103", "06-03-103", "06-03-103", "06-03-103")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -72L), spec = structure(list(
cols = list(Pass = structure(list(), class = c("collector_character",
"collector")), Relation = structure(list(), class = c("collector_character",
"collector")), ID = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
My current way to calculate each part are:
## Event count for related & unrelated
events <- df %>%
mutate(Pass = factor(Pass, levels = c("N","Y", NA))) %>%
group_by(Pass) %>%
summarise(count = paste0(n()))
# Event count for related
events_rl <- df %>%
filter (Relation=="RELATED" %>%
mutate(Pass = factor(Pass, levels = c("N","Y", NA))) %>%
group_by(Pass) %>%
summarise(count = paste0(n()))
# People count for related & unrelated
people <- df %>%
distinct(ID,Pass) %>% group_by(Pass) %>% summarise(pcount = n())
CodePudding user response:
Not sure if this is what you are aiming for, using dplyr
and tidyr
you can do this...
library(dplyr)
library(tidyr)
#dataframe with all the relevant data
df1 <-
df %>%
mutate(Relation = "Related&NotRelated") %>%
bind_rows(df) %>%
filter(Relation != "UNRELATED")
#summary data for events
df_e <-
df1 %>%
group_by(Pass, Relation) %>%
summarise(count = n()) %>%
mutate(id = "e")
#summary data for people, combine with events and pivot for presentation
df1 %>%
distinct(ID, Pass, Relation) %>%
group_by(Pass, Relation) %>%
summarise(count = n()) %>%
mutate(id = "p") %>%
bind_rows(df_e) %>%
mutate(Relation = paste(id, Relation, sep = "_")) %>%
select(-id) %>%
pivot_wider(names_from = Relation, values_from = count)
#> # A tibble: 3 x 5
#> # Groups: Pass [3]
#> Pass p_RELATED `p_Related&NotRelated` e_RELATED `e_Related&NotRelated`
#> <chr> <int> <int> <int> <int>
#> 1 N 8 13 22 65
#> 2 Y NA 3 NA 5
#> 3 <NA> NA 2 NA 2
Created on 2021-09-24 by the reprex package (v2.0.0)