Home > Mobile >  Replace NA under condition
Replace NA under condition

Time:12-22

I need your kind tidying data frame. A sample of data is provided below:

> dput(data_1)
structure(list(subject = c("E1", "E1", "E1", "E1", "E1", "E1", 
"E1", "E1"), block = c(3, 3, 4, 4, 5, 5, 6, 6), condition = c("EI", 
"I", "EI", "I", "EI", "I", "EI", "I"), prev_total_RT = c("963", 
"NA", "963", "NA", "963", "NA", "963", "NA"), total_RT = c(271, 
1042, 409, 406, 544, 490, 645, 465), Item_number = c(17, 46, 
17, 46, 17, 46, 17, 46)), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"))
> data_1
# A tibble: 8 x 6
  subject block condition prev_total_RT total_RT Item_number
  <chr>   <dbl> <chr>     <chr>            <dbl>       <dbl>
1 E1          3 EI        963                271          17
2 E1          3 I         NA                1042          46
3 E1          4 EI        963                409          17
4 E1          4 I         NA                 406          46
5 E1          5 EI        963                544          17
6 E1          5 I         NA                 490          46
7 E1          6 EI        963                645          17
8 E1          6 I         NA                 465          46

While values of "prev_total_RT" for the condition "EI" are provided, it is not provided for the condition "I". I need a code that would generate the values of "prev_total_RT" for the condition "I".

The values of "prev_total_RT" for the condition "I" should be the sum of "total_RT" for condition "I" in "block" = 3, 4, and 5. This should be conditioned by each "subject" and "Item_number". For example, for the subject "E1" and Item_number "46" in condition "I", the value of "prev_total_RT" should be the sum of "total_RT" values in "block" 3, 4, 5 : 1042 406 490 = 1938.

The desired output is provided below:

> dput(data_2)
structure(list(subject = c("E1", "E1", "E1", "E1", "E1", "E1", 
"E1", "E1"), block = c(3, 3, 4, 4, 5, 5, 6, 6), condition = c("EI", 
"I", "EI", "I", "EI", "I", "EI", "I"), prev_total_RT = c(963, 
1938, 963, 1938, 963, 1938, 963, 1938), total_RT = c(271, 1042, 
409, 406, 544, 490, 645, 465), Item_number = c(17, 46, 17, 46, 
17, 46, 17, 46)), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"))

> data_2
# A tibble: 8 x 6
  subject block condition prev_total_RT total_RT Item_number
  <chr>   <dbl> <chr>             <dbl>    <dbl>       <dbl>
1 E1          3 EI                  963      271          17
2 E1          3 I                  1938     1042          46
3 E1          4 EI                  963      409          17
4 E1          4 I                  1938      406          46
5 E1          5 EI                  963      544          17
6 E1          5 I                  1938      490          46
7 E1          6 EI                  963      645          17
8 E1          6 I                  1938      465          46

Any help with this would be greatly appreciated.

CodePudding user response:

A straight forward method,

library(dplyr)

df %>% 
 group_by(subject, Item_number) %>% 
 mutate(prev_total_RT = replace(prev_total_RT, condition == 'I', sum(total_RT[block %in% c(3, 4, 5)])))

# subject block condition prev_total_RT total_RT Item_number
#  <chr>   <dbl> <chr>     <chr>            <dbl>       <dbl>
#1 E1          3 EI        963                271          17
#2 E1          3 I         1938              1042          46
#3 E1          4 EI        963                409          17
#4 E1          4 I         1938               406          46
#5 E1          5 EI        963                544          17
#6 E1          5 I         1938               490          46
#7 E1          6 EI        963                645          17
#8 E1          6 I         1938               465          46
  • Related