Home > Back-end >  create a new data frame with the variables names
create a new data frame with the variables names

Time:01-24

I have a data frame with multiple columns, a small version of the data frame looks like this:

wind<- c(0.84, 1.77, 3.5, 6.44, 7.55) 
ROS<- c(0.01,0.03,0.05, 0.07, 0.1) 
T_0.1_1 <- c(1110, 350, 250, 300, 311)
T_0.2_1 <- c(560, 200, 364, 258, 159)
T_0.3_1 <- c(258, 147, 369, 123, 624)
T_180.1_1 <- c(554, 226, 547, 842, 366) 
T_180.2_1 <- c(258, 147, 369, 123, 624)
T_180.3_1 <- c(110, 350, 250, 300, 311)

df<-data.frame(wind,ROS,T_0.1_1,T_0.2_1,T_0.3_1, 
              T_180.1_1,T_180.2_1, T_180.3_1)

I want to create another data frame with the variables "wind" and "ROS" and for each wind-ROS I want to select the variable that start with T_0. and T_180 with the max value and create a new variable with the name of the variable. The new data frame should look like this one:

wind<- c(0.84, 1.77, 3.5, 6.44, 7.55) 
ROS<- c(0.01,0.03,0.05, 0.07, 0.1)
new_T0<- c('T_0.1_1', 'T_0.1_1', 'T_0.3_1', 'T_0.1_1', 'T_0.3_1')
new_T180<- c('T_180.1_1', 'T_180.3_1', 'T_180.1_1', 'T_180.1_1', 'T_180.2_1')

df_new<-data.frame(ROS, wind,new_T0, new_T180)

Any help how I can do that is very much appreciated! Thank you!

CodePudding user response:

df %>%
  pivot_longer(!1:2)%>%
  group_by(wind, ROS, id2=str_remove( name, '[.].*'))%>%
  slice_max(value)%>%
  pivot_wider(1:2, names_from = 'id2', values_from = 'name', names_prefix = 'new_')

# A tibble: 5 × 4
   wind   ROS new_T_0 new_T_180
  <dbl> <dbl> <chr>   <chr>    
1  0.84  0.01 T_0.1_1 T_180.1_1
2  1.77  0.03 T_0.1_1 T_180.3_1
3  3.5   0.05 T_0.3_1 T_180.1_1
4  6.44  0.07 T_0.1_1 T_180.1_1
5  7.55  0.1  T_0.3_1 T_180.2_1

CodePudding user response:

Might be a better way of doing it but this gets you what you want:

library(dplyr)
library(tidyr)

df2 <- df %>%
  tibble::rowid_to_column("id") %>%
  gather("key", "value",-c(id, wind, ROS)) %>%
  mutate(new_grp = ifelse(grepl("T_0", key), "new_T0", "New_T180")) %>%
  group_by(id, new_grp) %>%
  filter(value == max(value)) %>%
  ungroup() %>%
  select(id, wind, ROS, new_grp, key) %>%
  spread("new_grp", "key") %>%
  select(-id)

CodePudding user response:

Below follows also a base R solution.

df<-data.frame(wind,ROS,T_0.1_1,T_0.2_1,T_0.3_1, 
               T_180.1_1,T_180.2_1, T_180.3_1)

new_df <- data.frame(wind, ROS)
new_df$new_T0 <- apply( df[, 3:8], MARGIN = 1, FUN = function(x) names(x)[which.max(x)])
new_df$new_T180 <- apply( df[, 3:8], MARGIN = 1, FUN = function(x) names(x)[which.min(x)])

CodePudding user response:

Using base R

cbind(df[1:2], lapply(split.default(df[-(1:2)],
  sub("\\..*", "", names(df)[-(1:2)])), \(x) names(x)[max.col(x, 'first')]))

-output

 wind  ROS     T_0     T_180
1 0.84 0.01 T_0.1_1 T_180.1_1
2 1.77 0.03 T_0.1_1 T_180.3_1
3 3.50 0.05 T_0.3_1 T_180.1_1
4 6.44 0.07 T_0.1_1 T_180.1_1
5 7.55 0.10 T_0.3_1 T_180.2_1

Or with tidyverse

library(dplyr)
library(purrr)
library(stringr)
df %>% 
   select(starts_with("T_")) %>% 
   split.default(str_remove(names(.), "\\..*")) %>% 
   map_dfc(~ names(.x)[max.col(.x, "first")]) %>%
   bind_cols(df %>%
     select(-starts_with("T_")), .)

-output

wind  ROS     T_0     T_180
1 0.84 0.01 T_0.1_1 T_180.1_1
2 1.77 0.03 T_0.1_1 T_180.3_1
3 3.50 0.05 T_0.3_1 T_180.1_1
4 6.44 0.07 T_0.1_1 T_180.1_1
5 7.55 0.10 T_0.3_1 T_180.2_1
  • Related