I have the following data:
structure(list(n_historia = c(6815084L, 6815084L, 6815084L, 6815084L,
6815084L, 6815084L, 6832636L, 6832636L, 6832636L, 6832636L),
from = structure(c(1550579100, 1550579160, 1550919120, 1553805900,
1555284240, 1555797180, 1554968160, 1558981560, 1560778200,
1564667100), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), to = structure(c(1550579160, 1550919120, 1553805900,
1553805900, 1555284240, 1555797180, 1558981560, 1558981560,
1561056480, 1564704240), tzone = "UTC", class = c("POSIXct",
"POSIXt")), cama = c("UCN17", "UCN07", "UCN18", "NEN02",
"UCN16", "NEN03", "UCN17", "NEN10", "NEN27", "NEN09"), box = c("UCN-BOX-3",
"UCN-BOX-2", "UCN-BOX-3", "NEN-BOX-1", "UCN-BOX-3", "NEN-BOX-1",
"UCN-BOX-3", "NEN-BOX-1", "NEN-BOX-3", "NEN-BOX-1"), fecha_primer_pos = structure(c(17935,
17935, 17935, 17935, 17935, 17935, 18100, 18100, 18100, 18100
), class = "Date"), fecha_ultimo_pos = structure(c(17981,
17981, 17981, 17981, 17981, 17981, 18100, 18100, 18100, 18100
), class = "Date")), row.names = c(NA, -10L), class = "data.frame")
And I wonder if there is a function to keep the first value in grouped data and set NA in the rest of them, to get something like this:
structure(list(n_historia = c(6815084L, 6815084L, 6815084L, 6815084L,
6815084L, 6815084L, 6832636L, 6832636L, 6832636L, 6832636L),
from = structure(c(1550579100, 1550579160, 1550919120, 1553805900,
1555284240, 1555797180, 1554968160, 1558981560, 1560778200,
1564667100), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), to = structure(c(1550579160, 1550919120, 1553805900,
1553805900, 1555284240, 1555797180, 1558981560, 1558981560,
1561056480, 1564704240), tzone = "UTC", class = c("POSIXct",
"POSIXt")), cama = c("UCN17", "UCN07", "UCN18", "NEN02",
"UCN16", "NEN03", "UCN17", "NEN10", "NEN27", "NEN09"), box = c("UCN-BOX-3",
"UCN-BOX-2", "UCN-BOX-3", "NEN-BOX-1", "UCN-BOX-3", "NEN-BOX-1",
"UCN-BOX-3", "NEN-BOX-1", "NEN-BOX-3", "NEN-BOX-1"), fecha_primer_pos = structure(c(17935,
NA, NA, NA, NA, NA, 18100, NA, NA, NA
), class = "Date"), fecha_ultimo_pos = structure(c(17981,
NA, NA, NA, NA, NA, 18100, NA, NA, NA
), class = "Date")), row.names = c(NA, -10L), class = "data.frame")
CodePudding user response:
Using if_else, check if it is the first row, then update the columns:
library(dplyr)
dd %>%
group_by(n_historia) %>%
mutate(fecha_primer_pos_new = if_else(row_number() == 1, fecha_primer_pos, as.Date(NA)),
fecha_ultimo_pos_new = if_else(row_number() == 1, fecha_ultimo_pos, as.Date(NA))) %>%
#selecting relevant columns for this Q&A
select(n_historia, fecha_primer_pos_new, fecha_ultimo_pos_new)
# # A tibble: 10 x 3
# # Groups: n_historia [2]
# n_historia fecha_primer_pos_new fecha_ultimo_pos_n~
# <int> <date> <date>
# 1 6815084 2019-02-08 2019-03-26
# 2 6815084 NA NA
# 3 6815084 NA NA
# 4 6815084 NA NA
# 5 6815084 NA NA
# 6 6815084 NA NA
# 7 6832636 2019-07-23 2019-07-23
# 8 6832636 NA NA
# 9 6832636 NA NA
# 10 6832636 NA NA
CodePudding user response:
You can use duplicated
-
df[duplicated(df$n_historia), c('fecha_primer_pos', 'fecha_ultimo_pos')] <- NA
df
# n_historia from to cama box fecha_primer_pos fecha_ultimo_pos
#1 6815084 2019-02-19 12:25:00 2019-02-19 12:26:00 UCN17 UCN-BOX-3 2019-02-08 2019-03-26
#2 6815084 2019-02-19 12:26:00 2019-02-23 10:52:00 UCN07 UCN-BOX-2 <NA> <NA>
#3 6815084 2019-02-23 10:52:00 2019-03-28 20:45:00 UCN18 UCN-BOX-3 <NA> <NA>
#4 6815084 2019-03-28 20:45:00 2019-03-28 20:45:00 NEN02 NEN-BOX-1 <NA> <NA>
#5 6815084 2019-04-14 23:24:00 2019-04-14 23:24:00 UCN16 UCN-BOX-3 <NA> <NA>
#6 6815084 2019-04-20 21:53:00 2019-04-20 21:53:00 NEN03 NEN-BOX-1 <NA> <NA>
#7 6832636 2019-04-11 07:36:00 2019-05-27 18:26:00 UCN17 UCN-BOX-3 2019-07-23 2019-07-23
#8 6832636 2019-05-27 18:26:00 2019-05-27 18:26:00 NEN10 NEN-BOX-1 <NA> <NA>
#9 6832636 2019-06-17 13:30:00 2019-06-20 18:48:00 NEN27 NEN-BOX-3 <NA> <NA>
#10 6832636 2019-08-01 13:45:00 2019-08-02 00:04:00 NEN09 NEN-BOX-1 <NA> <NA>
In dplyr
you can take help of replace
-
library(dplyr)
df %>%
group_by(n_historia) %>%
mutate(across(starts_with('fecha'), replace, -1, NA)) %>%
ungroup