I need to transform some column values into rows for specific rows.
Here is a sample dataset:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
> df
student_id question_id score C E O
1 1 10 2 NA NA NA
2 1 11 3 NA NA NA
3 1 12 4 1 2 1
4 2 20 2 NA NA NA
5 2 21 1 NA NA NA
6 2 22 5 1 1 3
For those rows that have values in A
, B
, and C
columns, I need to grab those values into the score column and add the suffices to define the question_id
. For example, the third row's score is the total of these three columns (A, B, C).
My desired output is below.
> df
student_id question_id score
1 1 10 2
2 1 11 3
3 1 12_C 1
4 1 12_E 2
5 1 12_O 1
6 2 20 2
7 2 21 1
8 2 22_C 1
9 2 22_E 1
10 2 22_O 3
CodePudding user response:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(A, B, C)) %>%
mutate(question_id = ifelse(!is.na(value), paste(question_id, name, sep = "_"), question_id),
score = ifelse(!is.na(value), value, score)) %>%
select(-c(name, value)) %>%
distinct()
#> # A tibble: 10 x 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 1 12_A 1
#> 4 1 12_B 2
#> 5 1 12_C 1
#> 6 2 20 2
#> 7 2 21 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
Created on 2021-09-15 by the reprex package (v2.0.0)
data
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3))
CodePudding user response:
Another option is to replace the summed score
(i.e. in question 12 and 22) with NA
. From there you can use pivot_longer
with score:C
and values_drop_na
. Then convert the rows where name
is score
to NA
. Finally unite
the question_id
and name
columns.
df %>%
mutate(score = ifelse(!is.na(A), NA, score)) %>%
pivot_longer('score':'C', values_drop_na = TRUE) %>%
mutate(name = na_if(name, 'score')) %>%
unite('question_id', c(question_id, name), na.rm = T)
#------
# A tibble: 10 x 3
student_id question_id value
<dbl> <chr> <dbl>
1 1 10 2
2 1 11 3
3 1 12_A 1
4 1 12_B 2
5 1 12_C 1
6 2 20 2
7 2 21 1
8 2 22_A 1
9 2 22_B 1
10 2 22_C 3
CodePudding user response:
Data
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
C = c(NA,NA,1, NA,NA,1),
E = c(NA,NA,2, NA,NA,1),
O = c(NA,NA,1, NA,NA,3)
)
How to
library(tidyverse)
df %>%
pivot_longer(cols = c(C,E,O)) %>%
filter(!is.na(value)) %>%
mutate(question_id = str_c(question_id,name,sep = "_")) %>%
select(-name,-value)
student_id question_id score
<dbl> <chr> <dbl>
1 1 12_C 4
2 1 12_E 4
3 1 12_O 4
4 2 22_C 5
5 2 22_E 5
6 2 22_O 5
CodePudding user response:
I'm not sure this is any better than just adding distinct()
at the end as in another answer, but I was curious if you could do this without pivoting the rows with NA
values so here's that:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
df %>%
mutate(make_long = !do.call(pmax, across(c(A, B, C), is.na))) %>%
group_by(ml = make_long) %>%
group_modify(~ {
if (first(.x$make_long))
pivot_longer(.x, c(A, B, C)) %>%
transmute(
student_id,
question_id = paste(question_id, name, sep = '_'),
score = value)
else
transmute(.x,
student_id,
question_id = as.character(question_id),
score)
}) %>%
ungroup() %>%
select(-ml)
#> # A tibble: 10 × 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 2 20 2
#> 4 2 21 1
#> 5 1 12_A 1
#> 6 1 12_B 2
#> 7 1 12_C 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
Created on 2021-09-15 by the reprex package (v2.0.1)