I have the following tibble:
> dput(y)
structure(list(line = c("786O_C9", "786O_C9", "786O_C9", "786O_C9",
"786O_C9", "786O_C9", "786O_C9", "786O_C9", "C2BBe1_C9", "C2BBe1_C9",
"C2BBe1_C9", "C2BBe1_C9", "C2BBe1_C9", "C2BBe1_C9", "C2BBe1_C9",
"C2BBe1_C9", "C2BBe1_C9", "C2BBe1_C9", "786O_C9", "786O_C9",
"786O_C9"), rep = c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L), attempt = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L), omit = c(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), date = c("07/09/2022",
"07/09/2022", "07/09/2022", "07/09/2022", "07/09/2022", "07/09/2022",
"07/09/2022", "07/09/2022", "21/09/2022", "21/09/2022", "21/09/2022",
"21/09/2022", "21/09/2022", "21/09/2022", "21/09/2022", "21/09/2022",
"21/09/2022", "21/09/2022", "22/09/2022", "22/09/2022", "22/09/2022"
), conc = c(379.35, 363.45, 355.3, 349.4, 364.25, 919.3, 172.25,
172.45, 293.1, 383.5, 436.9, 446.8, 391.2, 382.4, 392.5, 384.5,
409.8, 402, 434, 419.6, 418.9), a260_280 = c(1.842, 1.84, 1.84,
1.835, 1.836, 1.877, 1.809, 1.807, 1.764, 1.756, 1.733, 1.734,
1.879, 1.877, 1.762, 1.763, 1.858, 1.869, 1.828, 1.833, 1.847
), a260_230 = c(2.015, 2.016, 2.014, 2.014, 2.02, 1.977, 1.652,
1.669, 1.471, 1.475, 1.441, 1.438, 1.606, 1.592, 1.515, 1.619,
1.6, 1.605, 1.866, 2.004, 2.02)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -21L))
> data.frame(y)
line rep attempt omit date conc a260_280 a260_230
1 786O_C9 2 1 FALSE 07/09/2022 379.35 1.842 2.015
2 786O_C9 2 1 FALSE 07/09/2022 363.45 1.840 2.016
3 786O_C9 2 1 FALSE 07/09/2022 355.30 1.840 2.014
4 786O_C9 2 1 FALSE 07/09/2022 349.40 1.835 2.014
5 786O_C9 2 1 FALSE 07/09/2022 364.25 1.836 2.020
6 786O_C9 2 1 TRUE 07/09/2022 919.30 1.877 1.977
7 786O_C9 3 1 FALSE 07/09/2022 172.25 1.809 1.652
8 786O_C9 3 1 FALSE 07/09/2022 172.45 1.807 1.669
9 C2BBe1_C9 1 1 FALSE 21/09/2022 293.10 1.764 1.471
10 C2BBe1_C9 1 1 FALSE 21/09/2022 383.50 1.756 1.475
11 C2BBe1_C9 1 1 FALSE 21/09/2022 436.90 1.733 1.441
12 C2BBe1_C9 1 1 FALSE 21/09/2022 446.80 1.734 1.438
13 C2BBe1_C9 2 1 FALSE 21/09/2022 391.20 1.879 1.606
14 C2BBe1_C9 2 1 FALSE 21/09/2022 382.40 1.877 1.592
15 C2BBe1_C9 2 1 FALSE 21/09/2022 392.50 1.762 1.515
16 C2BBe1_C9 2 1 FALSE 21/09/2022 384.50 1.763 1.619
17 C2BBe1_C9 3 1 FALSE 21/09/2022 409.80 1.858 1.600
18 C2BBe1_C9 3 1 FALSE 21/09/2022 402.00 1.869 1.605
19 786O_C9 2 2 FALSE 22/09/2022 434.00 1.828 1.866
20 786O_C9 2 2 FALSE 22/09/2022 419.60 1.833 2.004
21 786O_C9 2 2 FALSE 22/09/2022 418.90 1.847 2.020
For those lines and reps where there are multiple attempts (1 2), I would like to remove any rows which are attempt 1. Is this possible?
Ideal output would be:
> y %>% filter(!(line == '786O_C9' & rep == 2 & attempt == 1))
# A tibble: 15 × 8
line rep attempt omit date conc a260_280 a260_230
<chr> <int> <int> <lgl> <chr> <dbl> <dbl> <dbl>
1 786O_C9 3 1 FALSE 07/09/2022 172. 1.81 1.65
2 786O_C9 3 1 FALSE 07/09/2022 172. 1.81 1.67
3 C2BBe1_C9 1 1 FALSE 21/09/2022 293. 1.76 1.47
4 C2BBe1_C9 1 1 FALSE 21/09/2022 384. 1.76 1.48
5 C2BBe1_C9 1 1 FALSE 21/09/2022 437. 1.73 1.44
6 C2BBe1_C9 1 1 FALSE 21/09/2022 447. 1.73 1.44
7 C2BBe1_C9 2 1 FALSE 21/09/2022 391. 1.88 1.61
8 C2BBe1_C9 2 1 FALSE 21/09/2022 382. 1.88 1.59
9 C2BBe1_C9 2 1 FALSE 21/09/2022 392. 1.76 1.52
10 C2BBe1_C9 2 1 FALSE 21/09/2022 384. 1.76 1.62
11 C2BBe1_C9 3 1 FALSE 21/09/2022 410. 1.86 1.6
12 C2BBe1_C9 3 1 FALSE 21/09/2022 402 1.87 1.60
13 786O_C9 2 2 FALSE 22/09/2022 434 1.83 1.87
14 786O_C9 2 2 FALSE 22/09/2022 420. 1.83 2.00
15 786O_C9 2 2 FALSE 22/09/2022 419. 1.85 2.02
Thanks ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
CodePudding user response:
Is this what you are asking?
setdiff(y, (y %>% dplyr::filter(duplicated(y[,1:5]) & attempt == 1)))