I'm trying to find out most efficient way to transform my data from table1 to table2 below. since i have large amount of data in the table and for loop is not ideal to use in this case. therefore needs expert opinion to accomplish this in faster way.
route_id <- c('FR00020604','FR00020604','FR00020604','FR00020604','FR00026117','FR00026117','FR00026117',
'FR00026117','FR00026117')
flow_type <- c('STD','STD','NSTD','NSTD','STD','STD','STD','STD','STD')
prod <- c('654495','654495','654495','654495','732919','732919','732921','732921','732921')
route_seg_num <- c('01','02','01','02','01','02','01','02','03')
ship_from <- c('S22491IE02','S22521DE09','S22491IE02','S22521DE09','S00745BR01','S01480PA01',
'S00745BR01','S01480PA01','S100142300')
ship_to <- c('S22521DE09','S85879GB03','S22521DE09','S85879GB03','S01480PA01','S100142300',
'S01480PA01','S100142300','S100153252')
table1
table2
CodePudding user response:
An approach using tidyr
s pivot_wider
library(dplyr)
library(tidyr)
pivot_wider(df, c(route_id, flow_type, prod),
names_from=route_seg_num, values_from=c(ship_to, ship_from)) %>%
rename(origin = ship_from_01) %>%
select(route_id:origin) %>%
rename_with(function(x)
sub(".*_(\\d) $", "dest_\\1", x), starts_with("ship")) %>%
relocate(origin, .after=prod)
# A tibble: 4 × 7
route_id flow_type prod origin dest_1 dest_2 dest_3
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 FR00020604 STD 654495 S22491IE02 S22521DE09 S85879GB03 NA
2 FR00020604 NSTD 654495 S22491IE02 S22521DE09 S85879GB03 NA
3 FR00026117 STD 732919 S00745BR01 S01480PA01 S100142300 NA
4 FR00026117 STD 732921 S00745BR01 S01480PA01 S100142300 S100153252
Data
df <- structure(list(route_id = c("FR00020604", "FR00020604", "FR00020604",
"FR00020604", "FR00026117", "FR00026117", "FR00026117", "FR00026117",
"FR00026117"), flow_type = c("STD", "STD", "NSTD", "NSTD", "STD",
"STD", "STD", "STD", "STD"), prod = c("654495", "654495", "654495",
"654495", "732919", "732919", "732921", "732921", "732921"),
route_seg_num = c("01", "02", "01", "02", "01", "02", "01",
"02", "03"), ship_from = c("S22491IE02", "S22521DE09", "S22491IE02",
"S22521DE09", "S00745BR01", "S01480PA01", "S00745BR01", "S01480PA01",
"S100142300"), ship_to = c("S22521DE09", "S85879GB03", "S22521DE09",
"S85879GB03", "S01480PA01", "S100142300", "S01480PA01", "S100142300",
"S100153252")), class = "data.frame", row.names = c(NA, -9L
))
CodePudding user response:
df %>%
group_by(route_id, flow_type) %>%
mutate(origin = first(ship_from))%>%
pivot_wider(-ship_from,names_from = route_seg_num, values_from = ship_to,
names_prefix = 'dest_')
route_id flow_type prod origin dest_01 dest_02 dest_03
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 FR00020604 STD 654495 S22491IE02 S22521DE09 S85879GB03 NA
2 FR00020604 NSTD 654495 S22491IE02 S22521DE09 S85879GB03 NA
3 FR00026117 STD 732919 S00745BR01 S01480PA01 S100142300 NA
4 FR00026117 STD 732921 S00745BR01 S01480PA01 S100142300 S100153252
CodePudding user response:
Here is a solution based on tidyverse
.
library(tidyverse)
df |>
pivot_wider(names_from = route_seg_num, values_from = c(ship_from, ship_to)) |>
select(-ship_from_03, -ship_to_01) |>
rename("origin" = "ship_from_01",
"dest1" = "ship_from_02",
"dest2" = "ship_to_02",
"dest3" = "ship_to_03")
#> # A tibble: 4 × 7
#> # Groups: route_id, flow_type [3]
#> route_id flow_type prod origin dest1 dest2 dest3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 FR00020604 STD 654495 S22491IE02 S22521DE09 S85879GB03 <NA>
#> 2 FR00020604 NSTD 654495 S22491IE02 S22521DE09 S85879GB03 <NA>
#> 3 FR00026117 STD 732919 S00745BR01 S01480PA01 S100142300 <NA>
#> 4 FR00026117 STD 732921 S00745BR01 S01480PA01 S100142300 S100153252
DATA
route_id <- c(
"FR00020604", "FR00020604", "FR00020604", "FR00020604", "FR00026117", "FR00026117", "FR00026117",
"FR00026117", "FR00026117"
)
flow_type <- c("STD", "STD", "NSTD", "NSTD", "STD", "STD", "STD", "STD", "STD")
prod <- c("654495", "654495", "654495", "654495", "732919", "732919", "732921", "732921", "732921")
route_seg_num <- c("01", "02", "01", "02", "01", "02", "01", "02", "03")
ship_from <- c(
"S22491IE02", "S22521DE09", "S22491IE02", "S22521DE09", "S00745BR01", "S01480PA01",
"S00745BR01", "S01480PA01", "S100142300"
)
ship_to <- c(
"S22521DE09", "S85879GB03", "S22521DE09", "S85879GB03", "S01480PA01", "S100142300",
"S01480PA01", "S100142300", "S100153252"
)
df <- data.frame(
route_id, flow_type, prod, route_seg_num, ship_from, ship_to
)