Home > Mobile >  Delete group from grouped data with a specific criteria in specific row
Delete group from grouped data with a specific criteria in specific row

Time:03-23

I have a grouped data and I am trying to delete all groups where the first row of the group has a "yes" in the column Caffeinefactor.

I have the following code, but do not seem to get it right:

library(dplyr)
df2 <- Data2 %>%
  setDT(Data2) %>%
  group_by(PATIENT.ID) %>%
  add_column(TrueFalse = 
               ifelse(Data2$Caffeinefactor == "yes" & row_number(1)), TRUE, FALSE)
  filter(Caffeinefactor %in% c("row_number(1)", "yes"))

For the the following data (just a small amount of all the data) I would for example like to delete the Patient "220909" as it has a "yes" in the first row.

DF = structure(list(PATIENT.ID = c(210625L, 210625L, 210625L, 210625L, 
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 220909L, 
220909L, 220909L, 220909L, 220909L, 220909L, 220909L, 220909L, 
220909L, 220909L, 221179L, 221179L, 221179L, 221179L, 221179L, 
221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 
221179L, 221179L, 301705L, 301705L, 301705L, 301705L, 301705L, 
301705L, 301705L, 301705L, 301705L, 301705L, 301705L, 301705L, 
301705L, 301705L, 301705L, 303926L, 303926L, 303926L, 303926L
), PATIENT.TREATMENT.NUMBER = c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L), Caffeinefactor = c("no", 
"no", "no", "no", "yes", "yes", "yes", "no", "yes", "yes", "yes", 
"yes", "yes", "no", "no", "yes", "yes", "yes", "yes", "yes", 
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "yes", "yes", "yes", "yes", 
"yes", "no", "no", "no", "no", "no", "no", "yes", "no", "yes", 
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no"
)), row.names = c(NA, -60L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x7fe7f7002ee0>)

CodePudding user response:

groups <- Data2 %>%
  setDT(Data2) %>%
  dplyr::group_by(PATIENT.ID)

groups_we_want_removed <- Data2 %>%
  setDT(Data2) %>%
  dplyr::group_by(PATIENT.ID) %>%
  dplyr::mutate(rownum = dplyr::row_number()) %>%
  dplyr::filter((rownum == 1 & Caffeinefactor == "yes"))

remove_people_with_only_no_caffeinefactor <- Data2 %>%
  setDT(Data2) %>%
  dplyr::group_by(PATIENT.ID) %>%
  dplyr::filter(Caffeinefactor == "yes")

desired_result <- groups %>%
   filter(!PATIENT.ID %in% groups_we_want_removed$PATIENT.ID) %>%
   filter(PATIENT.ID %in% remove_people_with_only_no_caffeinefactor$PATIENT.ID)

  •  Tags:  
  • r
  • Related