Synchronizing NA's in R-CodePudding

I have this data ("m"):

structure(list(id = 1:4, A1 = c(20, 20, 20, 20), B1 = c(20, 20, 
20, 20), A2 = c(10.0873038130365, 4.24227746311085, 4.15920316251515, 
14.466663533707), B2 = c(8.02412449161373, 1.94874394931141, 
12.9319354292045, 18.1870020286129), A3 = c(-2.52545701169281, 
3.91930463167899, -3.22801555234644, 12.175898045939), B3 = c(6.72839637238315, 
0.216884504971863, 9.43932210811731, 10.8221145438518), A4 = c(NA, 
-2.99467608949688, NA, 6.81498054505025), B4 = c(NA, -10.1318519125029, 
NA, 1.91945144708921), A5 = c(NA, NA, NA, -2.53105562138148), 
    B5 = c(NA, NA, NA, -4.39906344008031)), row.names = c(1L, 
4L, 8L, 11L), class = "data.frame", reshapeWide = list(v.names = NULL, 
    timevar = "time", idvar = "id", times = 1:5, varying = structure(c("A1", 
    "B1", "A2", "B2", "A3", "B3", "A4", "B4", "A5", "B5"), .Dim = c(2L, 
    5L))))

   id A1 B1        A2        B2        A3         B3        A4         B4        A5        B5
1   1 20 20 10.087304  8.024124 -2.525457  6.7283964        NA         NA        NA        NA
4   2 20 20  4.242277  1.948744  3.919305  0.2168845 -2.994676 -10.131852        NA        NA
8   3 20 20  4.159203 12.931935 -3.228016  9.4393221        NA         NA        NA        NA
11  4 20 20 14.466664 18.187002 12.175898 10.8221145  6.814981   1.919451 -2.531056 -4.399063

For each row (until the first NA in that row), I would like to create a random "turn" columns (e.g. a turn in a game). I replicated this manually:

A_options <- c("red", "blue", "green", "yellow")

A_turn_1 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_2 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_3 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_4 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_5 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_options <- c("grey", "black", "white", "pink")

B_turn_1 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_2 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_3 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_4 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_5 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))

new = cbind(m,A_turn_1,A_turn_2, A_turn_3, A_turn_4, A_turn_5, B_turn_1, B_turn_2, B_turn_3, B_turn_4, B_turn_5)

 new
   id A1 B1        A2        B2        A3         B3        A4         B4        A5        B5 A_turn_1 A_turn_2 A_turn_3 A_turn_4 A_turn_5 B_turn_1 B_turn_2 B_turn_3 B_turn_4 B_turn_5
1   1 20 20 10.087304  8.024124 -2.525457  6.7283964        NA         NA        NA        NA   yellow    green     blue    green   yellow     grey    black    black     pink     grey
4   2 20 20  4.242277  1.948744  3.919305  0.2168845 -2.994676 -10.131852        NA        NA      red      red   yellow    green      red     pink    black    white    black    black
8   3 20 20  4.159203 12.931935 -3.228016  9.4393221        NA         NA        NA        NA     blue      red   yellow     blue     blue     pink     grey    black    white     pink
11  4 20 20 14.466664 18.187002 12.175898 10.8221145  6.814981   1.919451 -2.531056 -4.399063    green   yellow      red      red     blue     pink    white     grey    white     grey

For each row, I would like the last turn to be synchronized with the first NA. In this case:

Row 1: A_turn_4, A_turn_5, B_turn_4, B_turn_5 would be NA
Row 2: A_turn_5, B_turn_5 be NA
Row 3: A_turn_4, A_turn_5, B_turn_4, B_turn_5 would be NA
Row 4: No element would be NA

Is there some way in R to automatically replace these entries with NA?

Thank you!

CodePudding user response：

In Base R you could simply do:

nms <- names(new)
is.na(new[grep('A_turn', nms)]) <- is.na(dat[grep('A\\d', nms)])
is.na(dat[grep('B_turn', nms)]) <- is.na(dat[grep('B\\d', nms)])

Note that your code is repetative. You could change that into:

A_options <- c("red", "blue", "green", "yellow")
B_options <- c("grey", "black", "white", "pink")

size <- 5
A <- replicate(size, sample(A_options, replace = TRUE))
# Note that you do not have to specify the probability since they are all equal.
colnames(A) <- paste('A_turn', seq_len(size), sep='_')          


B <- replicate(size, sample(B_options, replace = TRUE))
colnames(B) <- paste('B_turn', seq_len(size), sep='_')          

new <- cbind(m, A, B)

CodePudding user response：

Why don't you simply do something like this?

new[is.na(new$A1), "A_turn_1"] <- NA
new[is.na(new$A2), "A_turn_2"] <- NA
new[is.na(new$A3), "A_turn_3"] <- NA
new[is.na(new$A4), "A_turn_4"] <- NA
new[is.na(new$A5), "A_turn_5"] <- NA
new[is.na(new$B1), "B_turn_1"] <- NA
new[is.na(new$B2), "B_turn_2"] <- NA
new[is.na(new$B3), "B_turn_3"] <- NA
new[is.na(new$B4), "B_turn_4"] <- NA
new[is.na(new$B5), "B_turn_5"] <- NA

you could also make it even more automatic by looping over it. For this you can use this trick: How to use a string variable to select a data frame column using $ notation

Here is how I would do that:

# Get col values
col1 <- paste0(rep(c("A", "B"), each = 5), rep(as.character(1:5), 2))
col2 <- paste0(rep(c("A_turn_", "B_turn_"), each = 5), rep(as.character(1:5), 2))

# Loop over those
for(i in 1:length(col1)){
  new[is.na(new[, col1[i]]), col2[i]] <- NA
}

CodePudding user response：

With dplyr, you could automatically create random "turn" columns and replace certain positions with NA all at once:

library(dplyr)
set.seed(1234)

A_options <- c("red", "blue", "green", "yellow")
B_options <- c("grey", "black", "white", "pink")

df %>%
  mutate(across(matches("^A.$"), ~ replace(sample(A_options, 4, TRUE), is.na(.x), NA), .names = "{.col}_turn"),
         across(matches("^B.$"), ~ replace(sample(B_options, 4, TRUE), is.na(.x), NA), .names = "{.col}_turn"))

#    id A1 B1        A2        B2        A3         B3        A4         B4        A5        B5 A1_turn A2_turn A3_turn A4_turn A5_turn B1_turn B2_turn B3_turn B4_turn B5_turn
# 1   1 20 20 10.087304  8.024124 -2.525457  6.7283964        NA         NA        NA        NA  yellow     red     red    <NA>    <NA>   black    pink    pink    <NA>    <NA>
# 4   2 20 20  4.242277  1.948744  3.919305  0.2168845 -2.994676 -10.131852        NA        NA  yellow  yellow    blue   green    <NA>   black    pink    pink   white    <NA>
# 8   3 20 20  4.159203 12.931935 -3.228016  9.4393221        NA         NA        NA        NA    blue   green  yellow    <NA>    <NA>    pink    grey   white    <NA>    <NA>
# 11  4 20 20 14.466664 18.187002 12.175898 10.8221145  6.814981   1.919451 -2.531056 -4.399063    blue     red  yellow    blue  yellow   black    pink    pink   black   white