How to create a new data set with extracted unique ID with maximum time ≤ 4 years and their status at corresponding max time (4 years or less) and corresponding cancer variable also at max time?
I have such data: Data example
I want create such data set as in data1:Data what I want to extract
data <- structure(list(State = structure(c(1L, 1L, 1L, 1L,1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor"),
Time = structure(1:18, .Label = c("0", "1", "2", "3", "4", "5", "0", "1", "2", "3", "0", "1", "2", "3", "4", "5", "6", "7"), class = "factor"),
Status = c(0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L ),
cancer = structure(1:18, .Label = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "1", "1", "1", "1", "1", "1", "1", "1"), class = "factor")),
.Names = c("ID", "timeYears", "status", "cancer"),
class = "data.frame", row.names = c(NA, -18L))
data1 <- structure(list(State = structure(c(1L, 2L, 3L), .Label = c("1", "2", "3"), class = "factor"),
Time = structure(1:3, .Label = c("4", "3", "4"), class = "factor"),
Status = c( 1L, 0L, 0L),
cancer = structure(1:3, .Label = c("1", "2", "1"), class = "factor")),
.Names = c("ID", "timeYears", "status", "cancer"),
class = "data.frame", row.names = c(NA, -3L))
CodePudding user response:
dplyr
library(dplyr)
data %>%
dplyr::filter(as.integer(as.character(timeYears)) <= 4) %>%
group_by(ID) %>%
slice_max(timeYears) %>%
ungroup()
# # A tibble: 3 × 4
# ID timeYears status cancer
# <fct> <fct> <int> <fct>
# 1 1 4 1 1
# 2 2 3 0 2
# 3 3 4 0 1
base R
data[ave(as.integer(as.character(data$timeYears)), data$ID,
FUN = function(z) z == max(z[z <= 4])) > 0,]
# ID timeYears status cancer
# 5 1 4 1 1
# 10 2 3 0 2
# 15 3 4 0 1