HAVE = data.frame( STUDENT =c( 1,1,2,2,2,3,3 ),
TEST =c( 'A','B','A','B','C','A','C' ))
WANT = data.frame(STUDENT=c(1,2,3),
A=c(1,1,1),
B=c(1,1,0),
C=c(0,1,1),
TOT=c(2,3,2),
TOT.NOT.A=c(1,2,1))
I have a vertical data and wish to convert to a horizontal data as shown above. I can do
WANT = HAVE %>% group_by(STUDENT) %>% mutate(TOT = n_distinct (TEST))
to get 'TOT' but I do not know how to get 'A' 'B' 'C' or 'TOT.NOT.A'
CodePudding user response:
We could reshape to 'wide' format with pivot_wider
and get the "TOT"al columns
library(dplyr)
library(tidyr)
HAVE %>%
pivot_wider(names_from = TEST, values_from = TEST,
values_fn = length, values_fill = 0) %>%
mutate(TOT = rowSums(across(-STUDENT), na.rm = TRUE),
TOT_NOT_A = rowSums(across(B:C), na.rm = TRUE))
-output
# A tibble: 3 × 6
STUDENT A B C TOT TOT_NOT_A
<dbl> <int> <int> <int> <dbl> <dbl>
1 1 1 1 0 2 1
2 2 1 1 1 3 2
3 3 1 0 1 2 1
Or using base R
out <- addmargins(table(HAVE), 2)
cbind(out, TOT_NOT_A = rowSums(out[, c("B", "C")]))
A B C Sum TOT_NOT_A
1 1 1 0 2 1
2 1 1 1 3 2
3 1 0 1 2 1
CodePudding user response:
Here is an alternative approach combining rowwise
with c_across
and sum
:
library(dplyr)
HAVE %>%
add_count(STUDENT, TEST) %>%
pivot_wider(names_from = TEST, values_from =n, values_fill = 0 ) %>%
rowwise() %>%
mutate(TOT = sum(c_across(A:C), na.rm = TRUE),
TOT_NOT_A = sum(c_across(B:C), na.rm = TRUE))
STUDENT A B C TOT TOT_NOT_A
<dbl> <int> <int> <int> <int> <int>
1 1 1 1 0 2 1
2 2 1 1 1 3 2
3 3 1 0 1 2 1