I have a data frame with multiple columns, a small version of the data frame looks like this:
wind<- c(0.84, 1.77, 3.5, 6.44, 7.55)
ROS<- c(0.01,0.03,0.05, 0.07, 0.1)
T_0.1_1 <- c(1110, 350, 250, 300, 311)
T_0.2_1 <- c(560, 200, 364, 258, 159)
T_0.3_1 <- c(258, 147, 369, 123, 624)
T_180.1_1 <- c(554, 226, 547, 842, 366)
T_180.2_1 <- c(258, 147, 369, 123, 624)
T_180.3_1 <- c(110, 350, 250, 300, 311)
df<-data.frame(wind,ROS,T_0.1_1,T_0.2_1,T_0.3_1,
T_180.1_1,T_180.2_1, T_180.3_1)
I want to create another data frame with the variables "wind" and "ROS" and for each wind-ROS I want to select the variable that start with T_0. and T_180 with the max value and create a new variable with the name of the variable. The new data frame should look like this one:
wind<- c(0.84, 1.77, 3.5, 6.44, 7.55)
ROS<- c(0.01,0.03,0.05, 0.07, 0.1)
new_T0<- c('T_0.1_1', 'T_0.1_1', 'T_0.3_1', 'T_0.1_1', 'T_0.3_1')
new_T180<- c('T_180.1_1', 'T_180.3_1', 'T_180.1_1', 'T_180.1_1', 'T_180.2_1')
df_new<-data.frame(ROS, wind,new_T0, new_T180)
Any help how I can do that is very much appreciated! Thank you!
CodePudding user response:
df %>%
pivot_longer(!1:2)%>%
group_by(wind, ROS, id2=str_remove( name, '[.].*'))%>%
slice_max(value)%>%
pivot_wider(1:2, names_from = 'id2', values_from = 'name', names_prefix = 'new_')
# A tibble: 5 × 4
wind ROS new_T_0 new_T_180
<dbl> <dbl> <chr> <chr>
1 0.84 0.01 T_0.1_1 T_180.1_1
2 1.77 0.03 T_0.1_1 T_180.3_1
3 3.5 0.05 T_0.3_1 T_180.1_1
4 6.44 0.07 T_0.1_1 T_180.1_1
5 7.55 0.1 T_0.3_1 T_180.2_1
CodePudding user response:
Might be a better way of doing it but this gets you what you want:
library(dplyr)
library(tidyr)
df2 <- df %>%
tibble::rowid_to_column("id") %>%
gather("key", "value",-c(id, wind, ROS)) %>%
mutate(new_grp = ifelse(grepl("T_0", key), "new_T0", "New_T180")) %>%
group_by(id, new_grp) %>%
filter(value == max(value)) %>%
ungroup() %>%
select(id, wind, ROS, new_grp, key) %>%
spread("new_grp", "key") %>%
select(-id)
CodePudding user response:
Below follows also a base R solution.
df<-data.frame(wind,ROS,T_0.1_1,T_0.2_1,T_0.3_1,
T_180.1_1,T_180.2_1, T_180.3_1)
new_df <- data.frame(wind, ROS)
new_df$new_T0 <- apply( df[, 3:8], MARGIN = 1, FUN = function(x) names(x)[which.max(x)])
new_df$new_T180 <- apply( df[, 3:8], MARGIN = 1, FUN = function(x) names(x)[which.min(x)])
CodePudding user response:
Using base R
cbind(df[1:2], lapply(split.default(df[-(1:2)],
sub("\\..*", "", names(df)[-(1:2)])), \(x) names(x)[max.col(x, 'first')]))
-output
wind ROS T_0 T_180
1 0.84 0.01 T_0.1_1 T_180.1_1
2 1.77 0.03 T_0.1_1 T_180.3_1
3 3.50 0.05 T_0.3_1 T_180.1_1
4 6.44 0.07 T_0.1_1 T_180.1_1
5 7.55 0.10 T_0.3_1 T_180.2_1
Or with tidyverse
library(dplyr)
library(purrr)
library(stringr)
df %>%
select(starts_with("T_")) %>%
split.default(str_remove(names(.), "\\..*")) %>%
map_dfc(~ names(.x)[max.col(.x, "first")]) %>%
bind_cols(df %>%
select(-starts_with("T_")), .)
-output
wind ROS T_0 T_180
1 0.84 0.01 T_0.1_1 T_180.1_1
2 1.77 0.03 T_0.1_1 T_180.3_1
3 3.50 0.05 T_0.3_1 T_180.1_1
4 6.44 0.07 T_0.1_1 T_180.1_1
5 7.55 0.10 T_0.3_1 T_180.2_1