I have this data ("m"):
structure(list(id = 1:4, A1 = c(20, 20, 20, 20), B1 = c(20, 20,
20, 20), A2 = c(10.0873038130365, 4.24227746311085, 4.15920316251515,
14.466663533707), B2 = c(8.02412449161373, 1.94874394931141,
12.9319354292045, 18.1870020286129), A3 = c(-2.52545701169281,
3.91930463167899, -3.22801555234644, 12.175898045939), B3 = c(6.72839637238315,
0.216884504971863, 9.43932210811731, 10.8221145438518), A4 = c(NA,
-2.99467608949688, NA, 6.81498054505025), B4 = c(NA, -10.1318519125029,
NA, 1.91945144708921), A5 = c(NA, NA, NA, -2.53105562138148),
B5 = c(NA, NA, NA, -4.39906344008031)), row.names = c(1L,
4L, 8L, 11L), class = "data.frame", reshapeWide = list(v.names = NULL,
timevar = "time", idvar = "id", times = 1:5, varying = structure(c("A1",
"B1", "A2", "B2", "A3", "B3", "A4", "B4", "A5", "B5"), .Dim = c(2L,
5L))))
id A1 B1 A2 B2 A3 B3 A4 B4 A5 B5
1 1 20 20 10.087304 8.024124 -2.525457 6.7283964 NA NA NA NA
4 2 20 20 4.242277 1.948744 3.919305 0.2168845 -2.994676 -10.131852 NA NA
8 3 20 20 4.159203 12.931935 -3.228016 9.4393221 NA NA NA NA
11 4 20 20 14.466664 18.187002 12.175898 10.8221145 6.814981 1.919451 -2.531056 -4.399063
For each row (until the first NA in that row), I would like to create a random "turn" columns (e.g. a turn in a game). I replicated this manually:
A_options <- c("red", "blue", "green", "yellow")
A_turn_1 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_2 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_3 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_4 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
A_turn_5 <- sample(A_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_options <- c("grey", "black", "white", "pink")
B_turn_1 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_2 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_3 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_4 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
B_turn_5 <- sample(B_options, 4, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25))
new = cbind(m,A_turn_1,A_turn_2, A_turn_3, A_turn_4, A_turn_5, B_turn_1, B_turn_2, B_turn_3, B_turn_4, B_turn_5)
new
id A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A_turn_1 A_turn_2 A_turn_3 A_turn_4 A_turn_5 B_turn_1 B_turn_2 B_turn_3 B_turn_4 B_turn_5
1 1 20 20 10.087304 8.024124 -2.525457 6.7283964 NA NA NA NA yellow green blue green yellow grey black black pink grey
4 2 20 20 4.242277 1.948744 3.919305 0.2168845 -2.994676 -10.131852 NA NA red red yellow green red pink black white black black
8 3 20 20 4.159203 12.931935 -3.228016 9.4393221 NA NA NA NA blue red yellow blue blue pink grey black white pink
11 4 20 20 14.466664 18.187002 12.175898 10.8221145 6.814981 1.919451 -2.531056 -4.399063 green yellow red red blue pink white grey white grey
For each row, I would like the last turn to be synchronized with the first NA. In this case:
- Row 1: A_turn_4, A_turn_5, B_turn_4, B_turn_5 would be NA
- Row 2: A_turn_5, B_turn_5 be NA
- Row 3: A_turn_4, A_turn_5, B_turn_4, B_turn_5 would be NA
- Row 4: No element would be NA
Is there some way in R to automatically replace these entries with NA?
Thank you!
CodePudding user response:
In Base R you could simply do:
nms <- names(new)
is.na(new[grep('A_turn', nms)]) <- is.na(dat[grep('A\\d', nms)])
is.na(dat[grep('B_turn', nms)]) <- is.na(dat[grep('B\\d', nms)])
Note that your code is repetative. You could change that into:
A_options <- c("red", "blue", "green", "yellow")
B_options <- c("grey", "black", "white", "pink")
size <- 5
A <- replicate(size, sample(A_options, replace = TRUE))
# Note that you do not have to specify the probability since they are all equal.
colnames(A) <- paste('A_turn', seq_len(size), sep='_')
B <- replicate(size, sample(B_options, replace = TRUE))
colnames(B) <- paste('B_turn', seq_len(size), sep='_')
new <- cbind(m, A, B)
CodePudding user response:
Why don't you simply do something like this?
new[is.na(new$A1), "A_turn_1"] <- NA
new[is.na(new$A2), "A_turn_2"] <- NA
new[is.na(new$A3), "A_turn_3"] <- NA
new[is.na(new$A4), "A_turn_4"] <- NA
new[is.na(new$A5), "A_turn_5"] <- NA
new[is.na(new$B1), "B_turn_1"] <- NA
new[is.na(new$B2), "B_turn_2"] <- NA
new[is.na(new$B3), "B_turn_3"] <- NA
new[is.na(new$B4), "B_turn_4"] <- NA
new[is.na(new$B5), "B_turn_5"] <- NA
you could also make it even more automatic by looping over it. For this you can use this trick: How to use a string variable to select a data frame column using $ notation
Here is how I would do that:
# Get col values
col1 <- paste0(rep(c("A", "B"), each = 5), rep(as.character(1:5), 2))
col2 <- paste0(rep(c("A_turn_", "B_turn_"), each = 5), rep(as.character(1:5), 2))
# Loop over those
for(i in 1:length(col1)){
new[is.na(new[, col1[i]]), col2[i]] <- NA
}
CodePudding user response:
With dplyr
, you could automatically create random "turn" columns and replace certain positions with NA
all at once:
library(dplyr)
set.seed(1234)
A_options <- c("red", "blue", "green", "yellow")
B_options <- c("grey", "black", "white", "pink")
df %>%
mutate(across(matches("^A.$"), ~ replace(sample(A_options, 4, TRUE), is.na(.x), NA), .names = "{.col}_turn"),
across(matches("^B.$"), ~ replace(sample(B_options, 4, TRUE), is.na(.x), NA), .names = "{.col}_turn"))
# id A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A1_turn A2_turn A3_turn A4_turn A5_turn B1_turn B2_turn B3_turn B4_turn B5_turn
# 1 1 20 20 10.087304 8.024124 -2.525457 6.7283964 NA NA NA NA yellow red red <NA> <NA> black pink pink <NA> <NA>
# 4 2 20 20 4.242277 1.948744 3.919305 0.2168845 -2.994676 -10.131852 NA NA yellow yellow blue green <NA> black pink pink white <NA>
# 8 3 20 20 4.159203 12.931935 -3.228016 9.4393221 NA NA NA NA blue green yellow <NA> <NA> pink grey white <NA> <NA>
# 11 4 20 20 14.466664 18.187002 12.175898 10.8221145 6.814981 1.919451 -2.531056 -4.399063 blue red yellow blue yellow black pink pink black white