HAVE=data.frame("TEACHER"=c(1,1,1,1,1,2,2,2,2),
"STUDENT"=c(1,1,1,2,2,3,3,3,4),
"TRIMESTER"=c(1,2,3,2,3,3,4,5,4))
WANT=data.frame("TRIMESTER"=c(1,2,3,4,5),
"NEWSTUDENTS"=c(1,1,1,1,0),
"TOTALSTUDENTS"=c(1,2,3,4,4),
"NEWTEACHER"=c(1,0,1,0,0),
"TOTALTEACHER"=c(1,1,2,2,2))
I wish to convert HAVE to WANT and I wish to do it by taking the count of NEWSTUDENTS and TOTALSTUDENTS, NEWSTUDENTS is when a STUDENT appears first in the TRIMESTER and do the same for TEACHER
CodePudding user response:
We may loop across
the columns, create 'NEW' columns using duplicated
and then do a group by sum
library(dplyr)
library(stringr)
HAVE %>%
# order by TRIMESTER
arrange(TRIMESTER) %>%
# loop across TEACHER, STUDENT, create logical columns
# by modifying the .names
mutate(across(c(TEACHER, STUDENT), ~ !duplicated(.x),
.names = "NEW{.col}")) %>%
# grouped by TRIMESTER
group_by(TRIMESTER) %>%
# get the sum of 'NEW' columns
summarise(across(starts_with("NEW"), sum), .groups = 'drop') %>%
# loop over the 'NEW' columns, get the cumulative sum
# create new columns by modifying the .names
mutate(across(starts_with('NEW'), cumsum,
.names = "{str_replace(.col, 'NEW', 'TOTAL')}"))
-output
# A tibble: 5 × 5
TRIMESTER NEWTEACHER NEWSTUDENT TOTALTEACHER TOTALSTUDENT
<dbl> <int> <int> <int> <int>
1 1 1 1 1 1
2 2 0 1 1 2
3 3 1 1 2 3
4 4 0 1 2 4
5 5 0 0 2 4
CodePudding user response:
This is an extension of this other answer of mine.
suppressPackageStartupMessages(library(dplyr))
HAVE <- data.frame("TEACHER"=c(1,1,1,1,1,2,2,2,2),
"STUDENT"=c(1,1,1,2,2,3,3,3,4),
"TRIMESTER"=c(1,2,3,2,3,3,4,5,4))
WANT <- data.frame("TRIMESTER"=c(1,2,3,4,5),
"NEWSTUDENTS"=c(1,1,1,1,0),
"TOTALSTUDENTS"=c(1,2,3,4,4),
"NEWTEACHER"=c(1,0,1,0,0),
"TOTALTEACHER"=c(1,1,2,2,2))
HAVE %>%
mutate(NEWSTUDENTS = !duplicated(STUDENT),
NEWTEACHER = !duplicated(TEACHER)) %>%
group_by(TRIMESTER) %>%
summarise(NEWSTUDENTS = sum(NEWSTUDENTS),
NEWTEACHER = sum(NEWTEACHER)) %>%
ungroup() %>%
mutate(TOTALSTUDENTS = cumsum(NEWSTUDENTS),
TOTALTEACHER = cumsum(NEWTEACHER)) %>%
relocate(TOTALSTUDENTS, .before = NEWTEACHER)
#> # A tibble: 5 × 5
#> TRIMESTER NEWSTUDENTS TOTALSTUDENTS NEWTEACHER TOTALTEACHER
#> <dbl> <int> <int> <int> <int>
#> 1 1 1 1 1 1
#> 2 2 1 2 0 1
#> 3 3 1 3 1 2
#> 4 4 1 4 0 2
#> 5 5 0 4 0 2
Created on 2022-08-18 by the reprex package (v2.0.1)