I would like to write a function to convert a character vector in the "time_spent" column to a numeric vector of minutes.
#Import libraries
library(tidyr)
library(dplyr)
library(lubridate)
#Create dataframe
df <- data.frame(country = c("Russia", "China", "USA"),
time_spent = c("0:10", "1:20", "2:36"))
#Convert "time_spent" from factor to character
df$time_spent <- lapply(df$time_spent, as.character)
I can do this with the following code, but I want to learn how to write it as a function.
df %>%
mutate(minutes = paste(time_spent, "00", sep = ":"),
minutes = hms(minutes),
minutes = hour(minutes)*60 minute(minutes))
I know I've butchered this function and its application, but I cannot figure out how to get it to work.
#Function to convert "time_spent" vector into minutes
convert_to_minutes <- function(x) {
df$x = paste(x, "00", sep = ":") %>%
df$x = hms(x) %>%
df$x = hour(x)*60 minute(x)
}
#Apply function
df %>%
convert_to_minutes(df$time_spent)
Related question 2
How do I use map
to apply my convert_to_minutes
function to multiple time_spent
vectors as in the following dataframe?
df2 <- data.frame(country = c("Russia", "China", "USA"),
time_spent = c("0:10", "1:20", "2:36"),
time_spent2 = c("0:15", "0:12", "1:47"),
time_spent3 = c("0:25", "3:45", "0:18"))
CodePudding user response:
A function to convert to minutes, in base R only, could be the following.
convert_to_minutes <- function(x) {
f <- function(x) as.vector(as.integer(x) %*% c(60, 1))
y <- strsplit(as.character(x), ":")
sapply(y, f)
}
y <- c("0:10", "1:20", "2:36")
convert_to_minutes(y)
#[1] 10 80 156
To convert a data.frame column, with dplyr
function mutate
.
library(dplyr)
df %>%
mutate(minutes = convert_to_minutes(time_spent))
# country time_spent minutes
#1 Russia 0:10 10
#2 China 1:20 80
#3 USA 2:36 156
To convert multiple columns, use mutate(across(.))
.
df2 %>%
mutate(across(starts_with("time"), convert_to_minutes, .names = "minutes_{col}"))
# country time_spent time_spent2 time_spent3 minutes_time_spent minutes_time_spent2 minutes_time_spent3
#1 Russia 0:10 0:15 0:25 10 15 25
#2 China 1:20 0:12 3:45 80 12 225
#3 USA 2:36 1:47 0:18 156 107 18
CodePudding user response:
As you already use lubridate, you can make a more efficient use with it. Instead of using lubridate::hms
it also supports lubridate:hm
. Another handy function is lubridate::as.duration
which allows you to format in any measures, which in your case is minutes. Then just convert the duration to numeric and you are done.
x <- c("0:10", "1:20", "2:36")
as.numeric(as.duration(hm(x)), "minutes")
# [1] 10 80 156
Full answer to convert df2
df2 <- data.frame(country = c("Russia", "China", "USA"),
time_spent = c("0:10", "1:20", "2:36"),
time_spent2 = c("0:15", "0:12", "1:47"),
time_spent3 = c("0:25", "3:45", "0:18"))
library(lubridate)
library(data.table)
setDT(df2)
cols <- 2:length(names(df2))
df2[, (cols) := lapply(.SD, function(x) as.numeric(as.duration(hm(x)), "minutes")), .SDcols = cols]
CodePudding user response:
There are few typos and syntax errors in your function. You can use -
library(lubridate)
library(dplyr)
convert_to_minutes <- function(x) {
x = paste(x, "00", sep = ":")
x = lubridate::hms(x)
x = x@hour *60 x@minute
x
}
convert_to_minutes(df$time_spent)
#[1] 10 80 156
To apply it for multiple columns you can use across
in tidyverse
(or lapply
in base R).
df2 %>%
mutate(across(contains('time_spent'), convert_to_minutes))
# country time_spent time_spent2 time_spent3
#1 Russia 10 15 25
#2 China 80 12 225
#3 USA 156 107 18