Home > Back-end >  Reshaping data on multiple criteria for loop or custome function
Reshaping data on multiple criteria for loop or custome function

Time:01-25

I'm trying to find out most efficient way to transform my data from table1 to table2 below. since i have large amount of data in the table and for loop is not ideal to use in this case. therefore needs expert opinion to accomplish this in faster way.

route_id <- c('FR00020604','FR00020604','FR00020604','FR00020604','FR00026117','FR00026117','FR00026117',
'FR00026117','FR00026117')
 
flow_type <- c('STD','STD','NSTD','NSTD','STD','STD','STD','STD','STD')
    
prod <- c('654495','654495','654495','654495','732919','732919','732921','732921','732921')
    
route_seg_num <- c('01','02','01','02','01','02','01','02','03')
    
ship_from <- c('S22491IE02','S22521DE09','S22491IE02','S22521DE09','S00745BR01','S01480PA01',
'S00745BR01','S01480PA01','S100142300')
    
ship_to <- c('S22521DE09','S85879GB03','S22521DE09','S85879GB03','S01480PA01','S100142300',
'S01480PA01','S100142300','S100153252')

table1

enter image description here

table2

enter image description here

CodePudding user response:

An approach using tidyrs pivot_wider

library(dplyr)
library(tidyr)

pivot_wider(df, c(route_id, flow_type, prod), 
    names_from=route_seg_num, values_from=c(ship_to, ship_from)) %>% 
  rename(origin = ship_from_01) %>% 
  select(route_id:origin) %>% 
  rename_with(function(x) 
    sub(".*_(\\d) $", "dest_\\1", x), starts_with("ship")) %>%
  relocate(origin, .after=prod)
# A tibble: 4 × 7
  route_id   flow_type prod   origin     dest_1     dest_2     dest_3    
  <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 NA        
2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 NA        
3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 NA        
4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252

Data

df <- structure(list(route_id = c("FR00020604", "FR00020604", "FR00020604", 
"FR00020604", "FR00026117", "FR00026117", "FR00026117", "FR00026117", 
"FR00026117"), flow_type = c("STD", "STD", "NSTD", "NSTD", "STD", 
"STD", "STD", "STD", "STD"), prod = c("654495", "654495", "654495", 
"654495", "732919", "732919", "732921", "732921", "732921"), 
    route_seg_num = c("01", "02", "01", "02", "01", "02", "01", 
    "02", "03"), ship_from = c("S22491IE02", "S22521DE09", "S22491IE02", 
    "S22521DE09", "S00745BR01", "S01480PA01", "S00745BR01", "S01480PA01", 
    "S100142300"), ship_to = c("S22521DE09", "S85879GB03", "S22521DE09", 
    "S85879GB03", "S01480PA01", "S100142300", "S01480PA01", "S100142300", 
    "S100153252")), class = "data.frame", row.names = c(NA, -9L
))

CodePudding user response:

df %>%
  group_by(route_id, flow_type) %>%
  mutate(origin = first(ship_from))%>%
  pivot_wider(-ship_from,names_from = route_seg_num, values_from = ship_to,
              names_prefix = 'dest_')



 route_id   flow_type prod   origin     dest_01    dest_02    dest_03   
  <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 NA        
2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 NA        
3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 NA        
4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252

CodePudding user response:

Here is a solution based on tidyverse.


library(tidyverse)

df |>
  pivot_wider(names_from = route_seg_num, values_from = c(ship_from, ship_to)) |>
  select(-ship_from_03, -ship_to_01) |> 
  rename("origin" = "ship_from_01", 
         "dest1" = "ship_from_02", 
         "dest2" = "ship_to_02", 
         "dest3" = "ship_to_03")

#> # A tibble: 4 × 7
#> # Groups:   route_id, flow_type [3]
#>   route_id   flow_type prod   origin     dest1      dest2      dest3     
#>   <chr>      <chr>     <chr>  <chr>      <chr>      <chr>      <chr>     
#> 1 FR00020604 STD       654495 S22491IE02 S22521DE09 S85879GB03 <NA>      
#> 2 FR00020604 NSTD      654495 S22491IE02 S22521DE09 S85879GB03 <NA>      
#> 3 FR00026117 STD       732919 S00745BR01 S01480PA01 S100142300 <NA>      
#> 4 FR00026117 STD       732921 S00745BR01 S01480PA01 S100142300 S100153252

DATA

route_id <- c(
  "FR00020604", "FR00020604", "FR00020604", "FR00020604", "FR00026117", "FR00026117", "FR00026117",
  "FR00026117", "FR00026117"
)

flow_type <- c("STD", "STD", "NSTD", "NSTD", "STD", "STD", "STD", "STD", "STD")

prod <- c("654495", "654495", "654495", "654495", "732919", "732919", "732921", "732921", "732921")

route_seg_num <- c("01", "02", "01", "02", "01", "02", "01", "02", "03")

ship_from <- c(
  "S22491IE02", "S22521DE09", "S22491IE02", "S22521DE09", "S00745BR01", "S01480PA01",
  "S00745BR01", "S01480PA01", "S100142300"
)

ship_to <- c(
  "S22521DE09", "S85879GB03", "S22521DE09", "S85879GB03", "S01480PA01", "S100142300",
  "S01480PA01", "S100142300", "S100153252"
)


df <- data.frame(
  route_id, flow_type, prod, route_seg_num, ship_from, ship_to
)
  • Related