Home > other >  Using pivot_longer with 2 groups of columns
Using pivot_longer with 2 groups of columns

Time:10-30

This is my code

first_df<-tibble(y_1 = seq(0,1*3.14, length.out = 1000),
            y_2 = seq(0,2*3.14, length.out = 1000),
            y_3 = seq(0,3*3.14, length.out = 1000),
            y_4 = seq(0,.2*3.14, length.out = 1000),
            y_5 = seq(0,1*3.14, length.out = 1000),
            flower_1 = sin(y_1)-2.5,
            flower_2 = cos(y_2),
            flower_3 = sin(y_3) 2.5,
            flower_4 = cos(y_4) 5,
            flower_5 = sin(y_5) 7)

I want to do a pivot_longer with my output being 4 columns: x, y, values_flowers and values_y

The output should be something like this:

  flowers    y     value...2 value...4
   <chr>    <chr>     <dbl>     <dbl>
 1 flower_1 y_1       -2.5    0      
 2 flower_1 y_1       -2.50   0.00314
 3 flower_1 y_1       -2.49   0.00629
 4 flower_1 y_1       -2.49   0.00943
... ...     ...        ...     ... 

CodePudding user response:

Another solution:

library(tidyverse)

first_df %>% 
  pivot_longer(everything(),
               names_to = c(".value","flowers"),
               names_pattern = "([a-z] _)(\\d)") %>%
  transmute(y=paste0("y_",flowers), flowers = paste0("flower_",flowers),
            value_flower=flower_, value_y=y_) %>% 
  arrange(y,flowers)
#> # A tibble: 5,000 × 4
#>    y     flowers  value_flower value_y
#>    <chr> <chr>           <dbl>   <dbl>
#>  1 y_1   flower_1        -2.5  0      
#>  2 y_1   flower_1        -2.50 0.00314
#>  3 y_1   flower_1        -2.49 0.00629
#>  4 y_1   flower_1        -2.49 0.00943
#>  5 y_1   flower_1        -2.49 0.0126 
#>  6 y_1   flower_1        -2.48 0.0157 
#>  7 y_1   flower_1        -2.48 0.0189 
#>  8 y_1   flower_1        -2.48 0.0220 
#>  9 y_1   flower_1        -2.47 0.0251 
#> 10 y_1   flower_1        -2.47 0.0283 
#> # … with 4,990 more rows

CodePudding user response:

Here's a hack way:

first_df %>% 
  mutate(temp = row_number()) %>% 
  pivot_longer(-temp) %>% 
  group_by(temp) %>% 
  mutate(flowers = ifelse(substr(name, 1, 6) == "flower", name, NA_character_),
         y = rep(name[1:5],2),
         value2 = value,
         value4 = rep(value[1:5],2)) %>% 
  drop_na() %>% 
  ungroup() %>% 
  select(flowers, y, value2, value4) %>% 
  arrange(flowers, y)

This gives us:

# A tibble: 5,000 x 4
   flowers  y     value2  value4
   <chr>    <chr>  <dbl>   <dbl>
 1 flower_1 y_1    -2.5  0      
 2 flower_1 y_1    -2.50 0.00314
 3 flower_1 y_1    -2.49 0.00629
 4 flower_1 y_1    -2.49 0.00943
 5 flower_1 y_1    -2.49 0.0126 
 6 flower_1 y_1    -2.48 0.0157 
 7 flower_1 y_1    -2.48 0.0189 
 8 flower_1 y_1    -2.48 0.0220 
 9 flower_1 y_1    -2.47 0.0251 
10 flower_1 y_1    -2.47 0.0283 
# ... with 4,990 more rows

CodePudding user response:

Here is an alternative solution:

  1. Split to df with only the y columns
  2. bring it in long format and assign to dataframe a after arranging
  3. start new with df and keep only the flower part
  4. bring it in long format
  5. bind cols from df a
library(dplyr)
library(tidyr)
a <- first_df %>% 
  select(1:5) %>% 
  pivot_longer(
    everything(),
    names_to = "y", 
    values_to = "value...4"
  ) %>% 
  arrange(y)


first_df %>% 
  select(-c(1:5)) %>% 
  pivot_longer(
    cols = everything(),
    names_to = "flowers",
    values_to = "value...2"
  ) %>% 
  arrange(flowers) %>% 
  bind_cols(a)
  flowers  value...2 y     value...4
   <chr>        <dbl> <chr>     <dbl>
 1 flower_1     -2.5  y_1     0      
 2 flower_1     -2.50 y_1     0.00314
 3 flower_1     -2.49 y_1     0.00629
 4 flower_1     -2.49 y_1     0.00943
 5 flower_1     -2.49 y_1     0.0126 
 6 flower_1     -2.48 y_1     0.0157 
 7 flower_1     -2.48 y_1     0.0189 
 8 flower_1     -2.48 y_1     0.0220 
 9 flower_1     -2.47 y_1     0.0251 
10 flower_1     -2.47 y_1     0.0283 
# ... with 4,990 more rows

CodePudding user response:

You could use pivot_longer combined with a join function:

library(dplyr)
library(tidyr)

temp_df <- first_df %>% 
  mutate(rn = row_number()) %>% 
  pivot_longer(-rn,
               names_to = c("cat", "rn2"),
               names_pattern = "(.*)_(.*)")

temp_df %>% 
  filter(cat == "y") %>% 
  left_join(temp_df %>% filter(cat != "y"),
            by = c("rn", "rn2")) %>% 
  mutate(y = paste0(cat.x, "_", rn2),
         flowers = paste0(cat.y, "_", rn2)) %>% 
  select(y, flowers, value_flower = value.y, value_y = value.x) %>% 
  arrange(y, flowers)

This returns

# A tibble: 5,000 x 4
   y     flowers  value_flower value_y
   <chr> <chr>           <dbl>   <dbl>
 1 y_1   flower_1        -2.5  0      
 2 y_1   flower_1        -2.50 0.00314
 3 y_1   flower_1        -2.49 0.00629
 4 y_1   flower_1        -2.49 0.00943
 5 y_1   flower_1        -2.49 0.0126 
 6 y_1   flower_1        -2.48 0.0157 
 7 y_1   flower_1        -2.48 0.0189 
 8 y_1   flower_1        -2.48 0.0220 
 9 y_1   flower_1        -2.47 0.0251 
10 y_1   flower_1        -2.47 0.0283 
# ... with 4,990 more rows

It's not a fast solution, but it can be done in one pipe:

first_df %>%
  mutate(rn = row_number()) %>% 
  pivot_longer(-rn) %>%
  mutate(rn2 = gsub(".*_", "", name)) %>% 
  group_by(rn) %>% 
  group_map(~.x %>% 
              filter(grepl("y_", name)) %>% 
              left_join(.x %>% filter(!grepl("y_", name)), 
                        by = "rn2")) %>% 
  bind_rows() %>% 
  select(y = name.x, flowers = name.y, value_y = value.x, value_flower = value.y)
  • Related