Create columns based on day difference and add value from another column-CodePudding

I have a data in following format:

ID  time0       obs_num recorded_dt 
1   2009-01-01  A       2009-01-01  
1   2009-01-01  D       2009-01-31  
1   2009-01-01  B       2009-01-05  
2   2005-02-02  B       2005-02-03

I want to calculate columns based on days difference for up to a month in reference to time0 and add respective values from the obs_num column. In the end, the data should look like this:

ID  time0       obs_num recorded_dt day0    day1    day2    day3    day4    day5    ... day31
1   2009-01-01  A       2009-01-01  A       NULL    NULL    NULL    NULL    NULL    ... NULL
1   2009-01-01  D       2009-01-31  NULL    NULL    NULL    NULL    NULL    NULL    ... D
1   2009-01-01  B       2009-01-05  NULL    NULL    NULL    NULL    NULL    B       ... NULL
2   2005-02-02  B       2005-02-03  NULL    B       NULL    NULL    NULL    NULL    ... NULL

CodePudding user response：

I don't know why do you want every value in a different column. That is inefficient. You can just use lubridate to work with dates:

example = data.frame(ID = c(1,1,1,2),
                     time0 = c('2009-01-01','2009-01-01','2009-01-01','2005-02-02'),
                     obs_num = c('A','D','B','B'),
                     recorded_dt = c('2009-01-01 ','2009-01-31','2009-01-05','2005-02-03')
                     )

library(tidyverse)
library(lubridate)

example$time0 = ymd(example$time0)
example$recorded_dt = ymd(example$recorded_dt)

example %>% 
  mutate(difs_days = floor(difftime(recorded_dt, time0, units="days")))

Output:

  ID      time0 obs_num recorded_dt difs_days
1  1 2009-01-01       A  2009-01-01    0 days
2  1 2009-01-01       D  2009-01-31   30 days
3  1 2009-01-01       B  2009-01-05    4 days
4  2 2005-02-02       B  2005-02-03    1 days

However, to get the result you asked for, we can use pivot_wider() and the names_prefix= property as follows:

example %>% 
  mutate(difs_days = floor(difftime(recorded_dt, time0, units="days"))) %>% 
  arrange(difs_days) %>% 
  pivot_wider(names_from = difs_days, values_from = obs_num, names_prefix = 'day') %>% 
  arrange(ID, recorded_dt)

Output:

# A tibble: 4 x 7
     ID time0      recorded_dt day0  day1  day4  day30
  <dbl> <date>     <date>      <chr> <chr> <chr> <chr>
1     1 2009-01-01 2009-01-01  A     NA    NA    NA   
2     1 2009-01-01 2009-01-05  NA    NA    B     NA   
3     1 2009-01-01 2009-01-31  NA    NA    NA    D    
4     2 2005-02-02 2005-02-03  NA    B     NA    NA

Note if you are pretending to create columns for everyday in a large dataframe, your RAM memory will be highly wasted.

CodePudding user response：

library(dplyr)
dat %>%
  mutate(
    day = recorded_dt - time0,
    day = if_else(day > 30, NA_character_, as.character(day))
  ) %>%
  bind_cols(model.matrix(~ day - 1, .)) %>%
  select(-day) %>%
  mutate(across(starts_with("day"), ~ if_else(. > 0, obs_num, obs_num[NA])))
#   ID      time0 obs_num recorded_dt day0 day1 day30 day4
# 1  1 2009-01-01       A  2009-01-01    A <NA>  <NA> <NA>
# 2  1 2009-01-01       D  2009-01-31 <NA> <NA>     D <NA>
# 3  1 2009-01-01       B  2009-01-05 <NA> <NA>  <NA>    B
# 4  2 2005-02-02       B  2005-02-03 <NA>    B  <NA> <NA>

Data, dates already converted to Date-class.

dat <- structure(list(ID = c(1L, 1L, 1L, 2L), time0 = structure(c(14245, 14245, 14245, 12816), class = "Date"), obs_num = c("A", "D", "B", "B"), recorded_dt = structure(c(14245, 14275, 14249, 12817), class = "Date")), row.names = c(NA, -4L), class = "data.frame")

CodePudding user response：

You can use pivot_wider() from tidyr.

library(dplyr)
library(tidyr)

df %>%
  mutate(diff = recorded_dt - time0) %>%
  pivot_wider(names_from = diff, names_prefix = "day", names_sort = TRUE,
              values_from = obs_num)

# # A tibble: 4 × 7
#      ID time0      recorded_dt day0  day1  day4  day30
#   <int> <date>     <date>      <chr> <chr> <chr> <chr>
# 1     1 2009-01-01 2009-01-01  A     NA    NA    NA   
# 2     1 2009-01-01 2009-01-31  NA    NA    NA    D    
# 3     1 2009-01-01 2009-01-05  NA    NA    B     NA   
# 4     2 2005-02-02 2005-02-03  NA    B     NA    NA

If you want to show all day differences, use complete() before pivot_wider().

df %>%
  mutate(diff = as.numeric(recorded_dt - time0)) %>%
  complete(nesting(ID, time0, recorded_dt), diff = 0:30) %>%
  pivot_wider(names_from = diff, names_prefix = "day", names_sort = TRUE,
              values_from = obs_num)

# # A tibble: 4 × 34
#      ID time0      recorded_dt day0  day1  day2  day3  day4  day5  ...... day30
#   <int> <date>     <date>      <chr> <chr> <chr> <chr> <chr> <chr> ...... <chr>
# 1     1 2009-01-01 2009-01-01  A     NA    NA    NA    NA    NA    ...... NA
# 2     1 2009-01-01 2009-01-05  NA    NA    NA    NA    B     NA    ...... D
# 3     1 2009-01-01 2009-01-31  NA    NA    NA    NA    NA    NA    ...... NA
# 4     2 2005-02-02 2005-02-03  NA    B     NA    NA    NA    NA    ...... NA

Data

df <- structure(list(ID = c(1L, 1L, 1L, 2L), time0 = structure(c(14245, 
14245, 14245, 12816), class = "Date"), obs_num = c("A", "D", 
"B", "B"), recorded_dt = structure(c(14245, 14275, 14249, 12817),
class = "Date")), row.names = c(NA, -4L), class = "data.frame")