I'm trying to group a dataset and get the first and highest values based on two separate measures of time and speed. So I need the time and speed for the earliest record in each group and then the time and speed for the fastest record in each group. I've got this far but need some help...
library(tidyverse)
group <- c(1,1,1,1,1,2,2,3,3,4,4,4,4,4,4)
time <- c(1,6,4,5,7,12,10,2,3,8,9,11,13,14,15)
speed <- c(17,6, 99, 34, 12, 5, 67, 43, 23, 12, 15, 78, 61, 78, 20)
data = data.frame(group, time, speed)
summary = data %>%
group_by(group) %>%
summarise(
firstTime = # lowest time
HighestSpeedTime = , # time for highest speed
firstSpeed = , #speed for lowest time
highestSpeed = max(speed), # highest speed
)
CodePudding user response:
Does this work?
library(tidyverse)
group <- c(1,1,1,1,1,2,2,3,3,4,4,4,4,4,4)
time <- c(1,6,4,5,7,12,10,2,3,8,9,11,13,14,15)
speed <- c(17,6, 99, 34, 12, 5, 67, 43, 23, 12, 15, 78, 61, 78, 20)
data = data.frame(group, time, speed)
summary <- data |>
arrange(group, time) |>
group_by(group) |>
summarise(
firsttime = min(time),
highest_speed = max(speed)
) |>
left_join(data, by = c("group", "highest_speed" = "speed")) |>
group_by(group) |>
slice(1) |>
rename(highest_speed_time = time) |>
left_join(data, by = c("group", "firsttime" = "time")) |>
rename(first_speed = speed)
summary
# group firsttime highest_speed highest_speed_time first_speed
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 99 4 17
# 2 10 67 10 67
# 3 2 43 2 43
# 4 8 78 11 12
CodePudding user response:
Another solution, with a chained inner_join
:
library(tidyverse)
data %>%
group_by(group) %>%
summarise(firstTime = min(time)) %>%
inner_join(data,by=c("group", "firstTime"="time")) %>%
rename(firstSpeed=speed) %>%
inner_join(
data %>%
group_by(group) %>%
summarise(highestSpeed = max(speed)) %>%
inner_join(data,by=c("group", "highestSpeed"="speed"))
) %>%
relocate(highestTime=time, .before="highestSpeed")
#> Joining, by = "group"
#> # A tibble: 5 × 5
#> group firstTime firstSpeed highestTime highestSpeed
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 17 4 99
#> 2 2 10 67 10 67
#> 3 3 2 43 2 43
#> 4 4 8 12 11 78
#> 5 4 8 12 14 78
CodePudding user response:
here is a data.table approach
library(data.table)
setDT(data)
temp <- data[data[, .I[speed == max(speed)], by = .(group)]$V1]
setnames(temp, new = c("group", "maxSpeedTime", "maxSpeed"))
# join together
data[, .(firstTime = time[1],
firstSpeed = speed[1]),
by = .(group)][temp, on = .(group)]
# group firstTime firstSpeed maxSpeedTime maxSpeed
# 1: 1 1 17 4 99
# 2: 2 12 5 10 67
# 3: 3 2 43 2 43
# 4: 4 8 12 11 78
# 5: 4 8 12 14 78
CodePudding user response:
Update: This should work: In group 4 we have ties therefore 2 rows:(we have at two time points the highest speed)!
library(dplyr)
data %>%
group_by(group) %>%
summarise(
firstTime = min(time), # lowest time
HighestSpeedTime = time[which(speed==max(speed))], # time for highest speed
firstSpeed = speed[which(time==min(time))],#speed for lowest time
highestSpeed = max(speed) # highest speed
)
output:
group firstTime HighestSpeedTime firstSpeed highestSpeed
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 4 17 99
2 2 10 10 67 67
3 3 2 2 43 43
4 4 8 11 12 78
5 4 8 14 12 78