In the following data frame...
Case <- c("Case1", "Case1", "Case1", "Case2", "Case2", "Case2", "Case3", "Case3", "Case3", "Case4", "Case4", "Case4", "Case5", "Case5", "Case5")
From <- c(1920, 1946, 1951, 1912, 1962, 1973, 1933, 1960, 1986, 1940, 1946, 1952, 1926, 1938, 1964)
To <- c(1945, 1950, 1951, 1961, 1972, 1973, 1959, 1985, 1986, 1945, 1951, 1952, 1937, 1963, 1970)
A <- c(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1)
B <- c(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0)
C <- c(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1)
D <- c(1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0)
Outcome <- c(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0)
Data <- data.frame(Case, From, To, A, B, C, D, Outcome)
... I create a new column (FLC) with a series of transformations based on specified elements of a vector (conditions):
conditions <- c("A", "B", "C", "D")
all_one_by_row <- function(data, cols) {
if(missing(cols)) as.integer(rowSums(data) == ncol(data))
else as.integer(rowSums(data[cols]) == ncol(data[cols]))
}
new.df <- Data
tmp <- sapply(new.df[conditions], function(x) x == Lag(x, -1))
tmp[is.na(tmp)] <- FALSE
new.df$FLC <- 1 - all_one_by_row(tmp)
new.df$FLC[new.df$Outcome > 0.5] <- 1
new.df$FLC[new.df$Case != Lag(new.df$Case, -1)] <- 1
new.df
I obtain:
> new.df
Case From To A B C D Outcome FLC
1 Case1 1920 1945 0 0 0 1 0 1
2 Case1 1946 1950 1 1 0 0 0 0
3 Case1 1951 1951 1 1 0 0 1 1
4 Case2 1912 1961 0 1 1 1 0 1
5 Case2 1962 1972 1 0 1 0 0 0
6 Case2 1973 1973 1 0 1 0 1 1
7 Case3 1933 1959 0 0 1 1 0 1
8 Case3 1960 1985 0 1 1 0 0 1
9 Case3 1986 1986 1 1 1 0 1 1
10 Case4 1940 1945 1 0 1 0 0 1
11 Case4 1946 1951 1 0 1 1 0 0
12 Case4 1952 1952 1 0 1 1 1 1
13 Case5 1926 1937 0 0 0 0 0 1
14 Case5 1938 1963 1 0 1 1 0 1
15 Case5 1964 1970 1 0 1 0 0 1
Now, I would like to carry out the same operation based on all elements of a list (combinations.list):
combinations <- unlist(lapply(seq_along(conditions), \(i) if(i > 1) combn(conditions, i, FUN = paste, collapse = "") else conditions))
combinations.list <- strsplit(combinations, "")
combinations.list
> combinations.list
[[1]]
[1] "A"
[[2]]
[1] "B"
[[3]]
[1] "C"
[[4]]
[1] "D"
[[5]]
[1] "A" "B"
[[6]]
[1] "A" "C"
[[7]]
[1] "A" "D"
[[8]]
[1] "B" "C"
[[9]]
[1] "B" "D"
[[10]]
[1] "C" "D"
[[11]]
[1] "A" "B" "C"
[[12]]
[1] "A" "B" "D"
[[13]]
[1] "A" "C" "D"
[[14]]
[1] "B" "C" "D"
[[15]]
[1] "A" "B" "C" "D"
I.e. I would like to obtain a data frame, where a new column (FLC.x) is created for every line of this list based on all elements in the line, such that:
CASES FROM TO A B C D OUTCOME FLC.A FLC.B FLC.C
1 Case1 1920 1945 0 0 0 1 0 1 1 0
2 Case1 1946 1950 1 1 0 0 0 0 0 0
3 Case1 1951 1951 1 1 0 0 1 1 1 1
4 Case2 1912 1961 0 1 1 1 0 1 1 0
5 Case2 1962 1972 1 0 1 0 0 0 0 0
6 Case2 1973 1973 1 0 1 0 1 1 1 1
7 Case3 1933 1959 0 0 1 1 0 0 1 0
8 Case3 1960 1985 0 1 1 0 0 1 0 0
9 Case3 1986 1986 1 1 1 0 1 1 1 1
10 Case4 1940 1945 1 0 1 0 0 0 0 0
11 Case4 1946 1951 1 0 1 1 0 0 0 0
12 Case4 1952 1952 1 0 1 1 1 1 1 1
13 Case5 1926 1937 0 0 0 0 0 1 0 1
14 Case5 1938 1963 1 0 1 1 0 0 0 0
15 Case5 1964 1970 1 0 1 0 0 1 1 1
FLC.D FLC.AB FLC.AC FLC.AD FLC.BC FLC.BD FLC.CD
1 1 1 1 1 1 1 1
2 0 0 0 0 0 0 0
3 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1
5 0 0 0 0 0 0 0
6 1 1 1 1 1 1 1
7 1 1 0 1 1 1 1
8 0 1 1 1 0 0 0
9 1 1 1 1 1 1 1
10 1 0 0 1 0 1 1
11 0 0 0 0 0 0 0
12 1 1 1 1 1 1 1
13 1 1 1 1 1 1 1
14 1 0 0 1 0 1 1
15 1 1 1 1 1 1 1
FLC.ABC FLC.ABD FLC.ACD FLC.BCD FLC.ABCD
1 1 1 1 1 1
2 0 0 0 0 0
3 1 1 1 1 1
4 1 1 1 1 1
5 0 0 0 0 0
6 1 1 1 1 1
7 1 1 1 1 1
8 1 1 1 0 1
9 1 1 1 1 1
10 0 1 1 1 1
11 0 0 0 0 0
12 1 1 1 1 1
13 1 1 1 1 1
14 0 1 1 1 1
15 1 1 1 1 1
>
Could someone help me with a way to do this?
CodePudding user response:
Thanks to r2evans indications, I managed to solve it like this:
conditions <- c("A", "B", "C", "D")
combinations <- unlist(lapply(seq_along(conditions), \(i) if(i > 1) combn(conditions, i, FUN = paste, collapse = "") else conditions))
combinations
flcs <- paste("FLC.", combinations, sep="")
flcs
combinations.list <- strsplit(combinations, "")
combinations.list
all_one_by_row <- function(data, cols) {
if(missing(cols)) as.integer(rowSums(data) == ncol(data))
else as.integer(rowSums(data[cols]) == ncol(data[cols]))
}
new.df <- Data
new.df <- lapply(combinations.list, function(conditions){
tmp <- sapply(new.df[conditions], function(x) x == Lag(x, -1))
tmp[is.na(tmp)] <- FALSE
new.df$FLC <- 1 - all_one_by_row(tmp)
new.df$FLC[new.df$Outcome > 0.5] <- 1
new.df$FLC[new.df$Case != Lag(new.df$Case, -1)] <- 1
new.df
})
new.df <- lapply(new.df, function(x) x[(names(x) %in% c("FLC"))])
new.df <- bind_cols(new.df)
colnames(new.df) <- flcs
new.df <- cbind(Data, new.df)
new.df