Home > OS >  Finding minimums in specified intervals for time series data
Finding minimums in specified intervals for time series data

Time:02-15

I have a set of lab values I want to trend over time in relation to a date of hospital admission. Each patient had variable entry's for this lab/follow up time. My goal is to identify the minimum value of this lab at various time intervals after their admission (date_one in the df) ie day 0-30, day 31-90, 1-2 years, 2-3, 3-4 etc, until their last follow up, in order to help me identity outliers that are a certain threshold above their baseline. As this lab value can change naturally over time I want to find these minimums to establish new baselines. As each patient has variable follow up, some up to 20 years, I was having trouble finding a function to find the local minimums that wasn't using filtering and mutate to make a new column for every interval I desired. My dput output is below, if this is incorrect formatting please let me know!

structure(list(lab_date = structure(c(10006, 10007, 10008, 10009, 
10010, 10011, 10012, 10013, 10014, 10015, 10016, 10018, 10019, 
10020, 10021, 10022, 10023, 10024, 10025, 10026, 10099, 10225, 
10242, 10361, 10575, 10729, 10785, 10849, 10856, 10857, 10858, 
10859, 10872, 10975, 11071, 11151, 11179, 11197, 11198, 11199, 
11201, 11202, 11203, 11204, 11206, 11207, 11208, 11210, 11226, 
11228, 11229, 11230, 11254, 11256, 11257, 11258, 11270, 11281, 
11282, 11282, 11309, 11310, 11338, 11339, 11372, 11373, 11401, 
11499, 11536, 11564, 11582, 11597, 11598, 11625, 11660, 11663, 
11664, 11665, 11666, 11667, 11668, 11695, 11696, 11697, 11698, 
11699, 11700, 11701, 11723, 11729, 11730, 11731, 11732, 11733, 
11734, 11735, 11736, 11737, 11765, 11828), class = "Date"), lab_value = c(1.1, 
1, 1.1, 1.8, 2.3, 2.4, 1.3, 1.3, 1.2, 1.2, 1.2, 1.5, 1.3, 1.1, 
1.1, 1.1, 1, 1, 1, 1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.3, 1.2, 
1.2, 1.7, 1.7, 1.7, 1.8, 1.8, 1.7, 1.8, 1.9, 1.7, 1.6, 1.7, 2.1, 
2.1, 2.5, 2.6, 2.7, 2.6, 2.3, 2, 2, 1.8, 1.9, 2, 1.6, 1.8, 2, 
2.1, 1.9, 1.8, 1.7, 1.8, 1.9, 1.8, 2.1, 1.9, 1.9, 1.9, 2.1, 2.1, 
2, 1.9, 2.1, 2, 2, 2, 2.1, 2, 1.8, 1.8, 2, 2.2, 2.4, 2.2, 2.2, 
2.1, 1.9, 2.1, 2.2, 2.4, 2.4, 2.3, 2.3, 2.5, 2.6, 3.1, 3.2, 3.4, 
3.6, 3.3, 3.1, 3), ID = c(182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
182, 182), Date_One = structure(c(10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
10856, 10856, 10856, 10856, 10856, 10856), class = "Date")), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    ID = 182, .rows = structure(list(1:100), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

CodePudding user response:

How about something like this? It has you specify the different segment breaks as days (can easily convert this to months or something else but will have to change other code), then for each of these segments, isolates the rows that are within the range of those breaks, and then finds the minimum of those. If there are no values in those dates it will return an NA. This should work for your data provided, if you want to apply this over a data frame with multiple ids let me know, that should just be an extra little loop.

#Convert object to dataframe
Data=data.frame(structure(list(lab_date = structure(c(10006, 10007, 10008, 10009, 
                                      10010, 10011, 10012, 10013, 10014, 10015, 10016, 10018, 10019, 
                                      10020, 10021, 10022, 10023, 10024, 10025, 10026, 10099, 10225, 
                                      10242, 10361, 10575, 10729, 10785, 10849, 10856, 10857, 10858, 
                                      10859, 10872, 10975, 11071, 11151, 11179, 11197, 11198, 11199, 
                                      11201, 11202, 11203, 11204, 11206, 11207, 11208, 11210, 11226, 
                                      11228, 11229, 11230, 11254, 11256, 11257, 11258, 11270, 11281, 
                                      11282, 11282, 11309, 11310, 11338, 11339, 11372, 11373, 11401, 
                                      11499, 11536, 11564, 11582, 11597, 11598, 11625, 11660, 11663, 
                                      11664, 11665, 11666, 11667, 11668, 11695, 11696, 11697, 11698, 
                                      11699, 11700, 11701, 11723, 11729, 11730, 11731, 11732, 11733, 
                                      11734, 11735, 11736, 11737, 11765, 11828), class = "Date"), lab_value = c(1.1, 
                                                                                                                1, 1.1, 1.8, 2.3, 2.4, 1.3, 1.3, 1.2, 1.2, 1.2, 1.5, 1.3, 1.1, 
                                                                                                                1.1, 1.1, 1, 1, 1, 1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.3, 1.2, 
                                                                                                                1.2, 1.7, 1.7, 1.7, 1.8, 1.8, 1.7, 1.8, 1.9, 1.7, 1.6, 1.7, 2.1, 
                                                                                                                2.1, 2.5, 2.6, 2.7, 2.6, 2.3, 2, 2, 1.8, 1.9, 2, 1.6, 1.8, 2, 
                                                                                                                2.1, 1.9, 1.8, 1.7, 1.8, 1.9, 1.8, 2.1, 1.9, 1.9, 1.9, 2.1, 2.1, 
                                                                                                                2, 1.9, 2.1, 2, 2, 2, 2.1, 2, 1.8, 1.8, 2, 2.2, 2.4, 2.2, 2.2, 
                                                                                                                2.1, 1.9, 2.1, 2.2, 2.4, 2.4, 2.3, 2.3, 2.5, 2.6, 3.1, 3.2, 3.4, 
                                                                                                                3.6, 3.3, 3.1, 3), ID = c(182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 
                                                                                                                                          182, 182), Date_One = structure(c(10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                        10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 10856, 
                                                                                                                                                                            10856, 10856, 10856, 10856, 10856, 10856), class = "Date")), class = c("grouped_df", 
                                                                                                                                                                                                                                                   "tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
                                                                                                                                                                                                                                                     ID = 182, .rows = structure(list(1:100), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                                                                                                                                                                            "vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                                                                                       "tbl", "data.frame"), .drop = TRUE)))
#Define Segment Breaks in days
SegmentBreaks=c(0,30,90,365,730)

#Function for finding min date

MinAtSegments=function(Data,SegmentBreaks){
  Date1=min(Data$lab_date)
  
  DateBreaks=Date1 SegmentBreaks
  
  Output=rep(NA,length(SegmentBreaks))
  
  DateBreaks=c(DateBreaks,Sys.Date())
  
  for(i in 1:length(Output)){
    LabVals=Data$lab_value[Data$lab_date>=DateBreaks[i] & Data$lab_date<DateBreaks[i 1]]
    Output[i]=ifelse(length(LabVals)>0,min(LabVals),NA)
  }
  
  return(Output)
}

#Run Function
MinAtSegments(Data,SegmentBreaks)

CodePudding user response:

Here is one possible option with tidyverse (but I'm not sure what format you want the output to be):

library(tidyverse)

df %>% 
  group_by(ID, Date_One) %>% 
  mutate(years = as.numeric(difftime(Date_One,lab_date,units = "days")/365)) %>% 
  filter(years >= 0) %>% 
  group_by(gr=cut(years, breaks= c(-Inf, c((30/365), (60/365)), seq(1, 20, by = 1))), ID) %>% 
  summarise(lab_value = min(lab_value))

Output

  gr               ID lab_value
  <fct>         <dbl>     <dbl>
1 (-Inf,0.0822]   182       1.2
2 (0.164,1]       182       1.2
3 (1,2]           182       1.2
4 (2,3]           182       1  
  • Related