My data looks like this:
dput(head(VI_v2_KRR05,28))
structure(list(ID = c("AUR", "AUR", "AUR", "AUR", "AUR", "AUR",
"AUR", "AUR", "AUR", "AUR", "AUR", "LAM", "LAM", "LAM", "LAM",
"LAM", "LAM", "LAM", "LAM", "LAM", "LAM", "P0", "P0", "P0", "P0",
"P0", "P01", "P01"), EVI_SOS = structure(c(16440, 16805, 17124,
17421, 17599, 17851, 18216, 18403, NA, NA, NA, 16272, 16406,
16637, 16771, 17148, 17516, 17725, 18022, 18210, NA, 16692, 16845,
17058, 17212, NA, 16717, 17077), class = "Date"), NDVI_SOS = structure(c(16436,
16801, 17110, 17420, 17607, 17841, 18196, 18402, NA, NA, NA,
16270, 16380, 16635, 16745, 17139, 17274, 17522, 17731, 18027,
18198, 16480, 16683, 17055, NA, NA, 16712, 17076), class = "Date"),
NIRv_SOS = structure(c(16424, 16557, 16789, 16922, 17221,
17379, 17607, 17821, 17931, 18214, 18400, 16274, 16404, 16639,
16769, 17145, 17519, 17727, 18028, 18208, NA, 16695, 16848,
17061, 17218, NA, 16720, 17084), class = "Date"), kNDVI_SOS = structure(c(16542,
16799, 17212, 17431, 17607, 17854, 18003, 18219, 18401, NA,
NA, 16282, 16647, 17139, 17516, 17733, 18026, 18205, NA,
NA, NA, 16474, 16706, 16847, 17073, 17209, 16712, 17075), class = "Date"),
EVI_EOS = structure(c(16766, 17084, 17356, 17577, 17812,
18098, 18385, 18585, NA, NA, NA, 16375, 16618, 16740, 17099,
17452, 17707, 17841, 18183, 18549, NA, 16825, 17012, 17193,
17396, NA, 17022, 17398), class = "Date"), NDVI_EOS = structure(c(16768,
17075, 17375, 17590, 17812, 18095, 18385, 18568, NA, NA,
NA, 16359, 16616, 16724, 16998, 17248, 17490, 17714, 17961,
18177, 18460, 16648, 17020, 17393, NA, NA, 17028, 17392), class = "Date"),
NIRv_EOS = structure(c(16533, 16768, 16899, 17191, 17358,
17590, 17798, 17920, 18104, 18379, 18547, 16373, 16619, 16738,
17086, 17441, 17709, 17839, 18182, 18471, NA, 16828, 17008,
17198, 17387, NA, 17028, 17398), class = "Date"), kNDVI_EOS = structure(c(16764,
17184, 17356, 17592, 17805, 17993, 18088, 18381, 18562, NA,
NA, 16620, 17007, 17439, 17715, 17845, 18180, 18457, NA,
NA, NA, 16625, 16833, 16991, 17192, 17366, 17026, 17391), class = "Date")), row.names = c(NA,
-28L), class = c("tbl_df", "tbl", "data.frame"))
I want to return NA the dates lower than 2015-09-04 for the ID
AUR, 2015-09-04 for the ID
LAM, 2016-01-02 for the ID
P0 and 2016-01-09 for the ID
P01.
Any help will be much appreciatted.
CodePudding user response:
We can use case_when
if there are only few IDs i.e. create the condition for each ID separately to return only the values that are greater than the threshold date or else, the default case in case_when
returns NA.
library(dplyr)
library(collapse)
out <- df1 %>%
mutate(across(where(is_date),
~ case_when(.x > as.Date("2015-09-04") & ID == 'AUR' ~ .x,
ID == "LAM" & .x > as.Date("2015-09-04 ") ~ .x,
ID == "P0" & .x > as.Date("2016-01-02 ") ~ .x,
ID == "P01" & .x > as.Date("2016-01-09") ~ .x)))
-checking
> df1 %>% filter(ID == "LAM")
# A tibble: 10 × 9
ID EVI_SOS NDVI_SOS NIRv_SOS kNDVI_SOS EVI_EOS NDVI_EOS NIRv_EOS kNDVI_EOS
<chr> <date> <date> <date> <date> <date> <date> <date> <date>
1 LAM 2014-07-21 2014-07-19 2014-07-23 2014-07-31 2014-11-01 2014-10-16 2014-10-30 2015-07-04
2 LAM 2014-12-02 2014-11-06 2014-11-30 2015-07-31 2015-07-02 2015-06-30 2015-07-03 2016-07-25
3 LAM 2015-07-21 2015-07-19 2015-07-23 2016-12-04 2015-11-01 2015-10-16 2015-10-30 2017-09-30
4 LAM 2015-12-02 2015-11-06 2015-11-30 2017-12-16 2016-10-25 2016-07-16 2016-10-12 2018-07-03
5 LAM 2016-12-13 2016-12-04 2016-12-10 2018-07-21 2017-10-13 2017-03-23 2017-10-02 2018-11-10
6 LAM 2017-12-16 2017-04-18 2017-12-19 2019-05-10 2018-06-25 2017-11-20 2018-06-27 2019-10-11
7 LAM 2018-07-13 2017-12-22 2018-07-15 2019-11-05 2018-11-06 2018-07-02 2018-11-04 2020-07-14
8 LAM 2019-05-06 2018-07-19 2019-05-12 NA 2019-10-14 2019-03-06 2019-10-13 NA
9 LAM 2019-11-10 2019-05-11 2019-11-08 NA 2020-10-14 2019-10-08 2020-07-28 NA
10 LAM NA 2019-10-29 NA NA NA 2020-07-17 NA NA
> out %>% filter(ID == "LAM")
# A tibble: 10 × 9
ID EVI_SOS NDVI_SOS NIRv_SOS kNDVI_SOS EVI_EOS NDVI_EOS NIRv_EOS kNDVI_EOS
<chr> <date> <date> <date> <date> <date> <date> <date> <date>
1 LAM NA NA NA NA NA NA NA NA
2 LAM NA NA NA NA NA NA NA 2016-07-25
3 LAM NA NA NA 2016-12-04 2015-11-01 2015-10-16 2015-10-30 2017-09-30
4 LAM 2015-12-02 2015-11-06 2015-11-30 2017-12-16 2016-10-25 2016-07-16 2016-10-12 2018-07-03
5 LAM 2016-12-13 2016-12-04 2016-12-10 2018-07-21 2017-10-13 2017-03-23 2017-10-02 2018-11-10
6 LAM 2017-12-16 2017-04-18 2017-12-19 2019-05-10 2018-06-25 2017-11-20 2018-06-27 2019-10-11
7 LAM 2018-07-13 2017-12-22 2018-07-15 2019-11-05 2018-11-06 2018-07-02 2018-11-04 2020-07-14
8 LAM 2019-05-06 2018-07-19 2019-05-12 NA 2019-10-14 2019-03-06 2019-10-13 NA
9 LAM 2019-11-10 2019-05-11 2019-11-08 NA 2020-10-14 2019-10-08 2020-07-28 NA
10 LAM NA 2019-10-29 NA NA NA 2020-07-17 NA NA
A general approach would be to a key/value data with 'key' being the unique 'ID' and then do a join with the original data and loop across the columns to convert to NA by comparing with the 'dates' column
library(data.table)
df2 <- data.table(ID = c("AUR", "LAM", "PO", "P01"),
dates = as.Date(c("2015-09-04", "2015-09-04", "2016-01-02", "2016-01-09")))
out1 <- copy(df1)
nm1 <- grep("_SOS$", names(out1), value = TRUE)
setDT(out1)[df2, (nm1) := lapply(.SD, \(x)
fcase(x > dates, x)), on = .(ID), .SDcols = nm1]
Or the same method in dplyr
df1 %>%
left_join(df2) %>%
mutate(across(c(where(is_date), -dates),
~ case_when(.x > dates ~ .x)), dates = NULL)
# A tibble: 28 × 9
ID EVI_SOS NDVI_SOS NIRv_SOS kNDVI_SOS EVI_EOS NDVI_EOS NIRv_EOS kNDVI_EOS
<chr> <date> <date> <date> <date> <date> <date> <date> <date>
1 AUR NA NA NA NA 2015-11-27 2015-11-29 NA 2015-11-25
2 AUR 2016-01-05 2016-01-01 NA 2015-12-30 2016-10-10 2016-10-01 2015-11-29 2017-01-18
3 AUR 2016-11-19 2016-11-05 2015-12-20 2017-02-15 2017-07-09 2017-07-28 2016-04-08 2017-07-09
4 AUR 2017-09-12 2017-09-11 2016-05-01 2017-09-22 2018-02-15 2018-02-28 2017-01-25 2018-03-02
5 AUR 2018-03-09 2018-03-17 2017-02-24 2018-03-17 2018-10-08 2018-10-08 2017-07-11 2018-10-01
6 AUR 2018-11-16 2018-11-06 2017-08-01 2018-11-19 2019-07-21 2019-07-18 2018-02-28 2019-04-07
7 AUR 2019-11-16 2019-10-27 2018-03-17 2019-04-17 2020-05-03 2020-05-03 2018-09-24 2019-07-11
8 AUR 2020-05-21 2020-05-20 2018-10-17 2019-11-19 2020-11-19 2020-11-02 2019-01-24 2020-04-29
9 AUR NA NA 2019-02-04 2020-05-19 NA NA 2019-07-27 2020-10-27
10 AUR NA NA 2019-11-14 NA NA NA 2020-04-27 NA