Home > front end >  How to parse data-variables as characters into dplyr filter
How to parse data-variables as characters into dplyr filter

Time:01-27

I have two dataframes, one summary dataframe with time periods and another dataframe with timestamps of sleep or wake states. For each time period of the first dataframe (each row of dfSum), I would like to extract the timestamp of the last "Sleep" state from the second dataframe (dfSlp).

library(tidyverse)
library(lubridate)

## example datasets:
dfSum <- data.frame(
  In.Bed.Date = c("6/4/2021", "7/4/2021", "8/4/2021"),
  In.Bed.Time = c("02:23", "01:19", "03:00"),
  Out.Bed.Date = c("6/4/2021", "7/4/2021", "8/4/2021"),
  Out.Bed.Time = c("09:15", "09:35", "08:59")
)

## Make In Bed and Out Bed Date and Time as DateTime object
dfSum <- dfSum %>% mutate(
  InBed = dmy_hm(paste0(In.Bed.Date, " ", In.Bed.Time), tz = "Asia/Singapore"),
  OutBed = dmy_hm(paste0(Out.Bed.Date, " ", Out.Bed.Time), tz = "Asia/Singapore")
)

dfSlp <- data.frame(
  Date = c("6/4/21","6/4/21","6/4/21","6/4/21","6/4/21","6/4/21",
           "7/4/21","7/4/21","7/4/21","7/4/21","7/4/21","7/4/21",
           "8/4/21","8/4/21","8/4/21","8/4/21","8/4/21","8/4/21"),
  Time = c("02:20","02:21","06:44", "08:59", "09:08", "09:09",
           "01:18","03:49","05:08", "06:50", "09:24", "09:25",
           "03:00","04:05","06:50", "07:40", "07:53", "07:58"),
  SleeporAwake = c("S","W","S","S","W","W",
                   "S","W","S","S","S","W",
                   "S","W","S","S","W","S")
)

## Make Date and Time as DateTime object
dfSlp <- dfSlp %>% mutate(
  DateTime = dmy_hm(paste0(Date, " ", Time), tz = "Asia/Singapore")
)

I was trying to use a function in dplyr mutate, but it doesn't work

######################################## Not working: functions in Dplyr ########################################

## Function to extract Date of last Sleep State
extractOffsetDate <- function(InBed, OutBed) {
  dfSlpTmp <- dfSlp %>% filter(DateTime >= {{ InBed }} & DateTime <= {{ OutBed }})
  dfSlpTmp <- dfSlpTmp %>% filter(SleeporAwake == "S")
  Offset.Date <- tail(dfSlpTmp$Date,1)
  return(Offset.Date)
}

## Function to extract Time of last Sleep State
extractOffsetTime <- function(InBed, OutBed) {
  dfSlpTmp <- dfSlp %>% filter(DateTime >= {{ InBed }} & DateTime <= {{ OutBed }})
  dfSlpTmp <- dfSlpTmp %>% filter(SleeporAwake == "S")
  Offset.Time <- tail(dfSlpTmp$Time,1)
  return(Offset.Time)
}

## Dplyr mutate, filter, to extract the last timestamp of "Sleep" state for each time period (row)
dfSumNew <- dfSum %>% mutate(
  Offset.Date = extractOffsetDate(InBed, OutBed),
  Offset.Time = extractOffsetTime(InBed, OutBed)
)

When I used the same function in Base R, it works, but I have hundreds of participants and many sleep periods to process.

######################################## Workaround 1 - functions in Base R ######################################## 
## Functions not using Dplyr
## Function to extract Date of last Sleep State
BASEextractOffsetDate <- function(InBed, OutBed) {
  dfSlpTmp <- dfSlp %>% filter(DateTime >= InBed & DateTime <= OutBed)
  dfSlpTmp <- dfSlpTmp %>% filter(SleeporAwake == "S")
  Offset.Date <- tail(dfSlpTmp$Date,1)
  return(Offset.Date)
}

## Function to extract Time of last Sleep State
BASEextractOffsetTime <- function(InBed, OutBed) {
  dfSlpTmp <- dfSlp %>% filter(DateTime >= InBed & DateTime <= OutBed)
  dfSlpTmp <- dfSlpTmp %>% filter(SleeporAwake == "S")
  Offset.Time <- tail(dfSlpTmp$Time,1)
  return(Offset.Time)
}

BASEextractOffsetDate(dfSum$InBed[1],dfSum$OutBed[1])
BASEextractOffsetTime(dfSum$InBed[1],dfSum$OutBed[1])
BASEextractOffsetDate(dfSum$InBed[2],dfSum$OutBed[2])
BASEextractOffsetTime(dfSum$InBed[2],dfSum$OutBed[2])
BASEextractOffsetDate(dfSum$InBed[3],dfSum$OutBed[3])
BASEextractOffsetTime(dfSum$InBed[3],dfSum$OutBed[3])

I have also tried using lapply, it works, but just wondering if there is a more straightforward way using dplyr.

######################################## Workaround 2 - lapply ########################################

## Create a list to feed into lapply
dfrange <- dfSum %>% select(InBed,OutBed)
dfranget <- t(dfrange)
dfrangel <- as.list(as.data.frame(dfranget))

## For each time period in this list, extract the time stamp of the last "S" state
vecOffset <- lapply(dfrangel, function(x) {
  
  fstart <- ymd_hms(x[1], tz = "Asia/Singapore")
  fend <- ymd_hms(x[2], tz = "Asia/Singapore")
  
  dfSlpTmp <- dfSlp %>% filter(DateTime >= fstart & DateTime <= fend)
  dfSlpTmp <- dfSlpTmp %>% filter(SleeporAwake == "S")
  offset.date <- tail(dfSlpTmp$Date,1)
  offset.time <- tail(dfSlpTmp$Time,1)
  tt <- c(x[1], x[2], offset.date, offset.time)
})

dfOffset <- data.frame(matrix(unlist(vecOffset), nrow=length(vecOffset), byrow=TRUE))
dfOffset <- dfOffset %>% rename(
  InBed = X1,
  OutBed = X2,
  Offset.Date = X3,
  Offset.Time = X4
) %>% mutate(
  InBed = ymd_hms(InBed, tz = "Asia/Singapore"),
  OutBed = ymd_hms(OutBed, tz = "Asia/Singapore")
)

## Merge the offsets with original summary dataset
dfSumNew <- dfSum %>% left_join(dfOffset, by = c("InBed", "OutBed"))
dfSumNew <- dfSumNew %>% select(-c(InBed,OutBed))

Thanks a lot!

CodePudding user response:

It seems you need rowwise here

dfSum %>% 
    rowwise() %>% 
    mutate(
        Offset.Date = extractOffsetDate(InBed, OutBed),
        Offset.Time = extractOffsetTime(InBed, OutBed)
    ) 
  •  Tags:  
  • Related