I have a grouped data and I am trying to delete all groups where the first row of the group has a "yes" in the column Caffeinefactor.
I have the following code, but do not seem to get it right:
library(dplyr)
df2 <- Data2 %>%
setDT(Data2) %>%
group_by(PATIENT.ID) %>%
add_column(TrueFalse =
ifelse(Data2$Caffeinefactor == "yes" & row_number(1)), TRUE, FALSE)
filter(Caffeinefactor %in% c("row_number(1)", "yes"))
For the the following data (just a small amount of all the data) I would for example like to delete the Patient "220909" as it has a "yes" in the first row.
DF = structure(list(PATIENT.ID = c(210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 220909L,
220909L, 220909L, 220909L, 220909L, 220909L, 220909L, 220909L,
220909L, 220909L, 221179L, 221179L, 221179L, 221179L, 221179L,
221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 221179L,
221179L, 221179L, 301705L, 301705L, 301705L, 301705L, 301705L,
301705L, 301705L, 301705L, 301705L, 301705L, 301705L, 301705L,
301705L, 301705L, 301705L, 303926L, 303926L, 303926L, 303926L
), PATIENT.TREATMENT.NUMBER = c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L), Caffeinefactor = c("no",
"no", "no", "no", "yes", "yes", "yes", "no", "yes", "yes", "yes",
"yes", "yes", "no", "no", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no",
"no", "no", "no", "no", "no", "no", "yes", "yes", "yes", "yes",
"yes", "no", "no", "no", "no", "no", "no", "yes", "no", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no"
)), row.names = c(NA, -60L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x7fe7f7002ee0>)
CodePudding user response:
groups <- Data2 %>%
setDT(Data2) %>%
dplyr::group_by(PATIENT.ID)
groups_we_want_removed <- Data2 %>%
setDT(Data2) %>%
dplyr::group_by(PATIENT.ID) %>%
dplyr::mutate(rownum = dplyr::row_number()) %>%
dplyr::filter((rownum == 1 & Caffeinefactor == "yes"))
remove_people_with_only_no_caffeinefactor <- Data2 %>%
setDT(Data2) %>%
dplyr::group_by(PATIENT.ID) %>%
dplyr::filter(Caffeinefactor == "yes")
desired_result <- groups %>%
filter(!PATIENT.ID %in% groups_we_want_removed$PATIENT.ID) %>%
filter(PATIENT.ID %in% remove_people_with_only_no_caffeinefactor$PATIENT.ID)