Home > Software engineering >  pivot a tibble to wide format based on multiple rows
pivot a tibble to wide format based on multiple rows

Time:11-16

Here's my data:

game_id     team      opponent    steals    assists
       <int> <chr>     <chr>           <chr>  <chr>  
 1 401468360 Spurs     Warriors        6      18     
 2 401468360 Warriors  Spurs           9      35     
 3 401468358 Clippers  Rockets         6      25     
 4 401468358 Rockets   Clippers        11     20     
 5 401468359 Hawks     Bucks           4      23     
 6 401468359 Bucks     Hawks           4      21     
 7 401468356 Thunder   Celtics         4      21     
 8 401468356 Celtics   Thunder         15     25     
 9 401468357 Suns      Heat            9      22     
10 401468357 Heat      Suns            6      3     

How do I get this data in the following format?

game_id     team     opponent     team_steals   opp_assists   team_assists   opp_steals
       <int> <chr>     <chr>          <int>       <chr>         <chr>         <chr>
 1 401468360 Spurs     Warriors        6           9              18            35
 2 401468358 Clippers  Rockets         6           11             25            20
 3 401468359 Hawks     Bucks           4           4              23            21
 4 401468356 Thunder   Celtics         4           15             21            25
 9 401468357 Suns      Heat            9           6              22            23

I have tried several variation of pivot_wider to no avail. I'm not even sure if that function will accomplish what I am seeking.

CodePudding user response:

library(dplyr)

dat %>% 
  right_join(dat, by = c("game_id", "team" = "opponent", "opponent" = "team"), 
                  suffix = c("_team", "_opponent")) %>% 
  distinct(game_id, .keep_all = TRUE)
#>     game_id     team opponent steals_team steals_opponent assists_team  assists_opponent
#> 1 401468360    Spurs Warriors           6               9           18                35
#> 2 401468358 Clippers  Rockets           6              11           25                20
#> 3 401468359    Hawks    Bucks           4               4           23                21
#> 4 401468356  Thunder  Celtics           4              15           21                25
#> 5 401468357     Suns     Heat           9               6           22                 3

CodePudding user response:

Something like this:

library(dplyr)
library(tidyr)
df %>%
  group_by(game_id) %>% 
  rename(team_steals = steals) %>% 
  mutate(opp_steals = lead(team_steals)) %>% 
  rename(team_assists = assists) %>% 
  mutate(opp_assists = lead(team_assists)) %>% 
  drop_na()
    game_id team     opponent team_steals team_assists opp_steals opp_assists
      <int> <chr>    <chr>          <int>        <int>      <int>       <int>
1 401468360 Spurs    Warriors           6           18          9          35
2 401468358 Clippers Rockets            6           25         11          20
3 401468359 Hawks    Bucks              4           23          4          21
4 401468356 Thunder  Celtics            4           21         15          25
5 401468357 Suns     Heat               9           22          6           3

CodePudding user response:

An option using pivot_wider with some data wrangling may look like so:

library(dplyr, w = FALSE)
library(tidyr)

dat |>
  group_by(game_id) |>
  mutate(foo = c("team", "opp"), across(c(team, opponent), first)) |>
  ungroup() |> 
  tidyr::pivot_wider(names_from = foo, values_from = c(steals, assists), names_glue = "{foo}_{.value}")
#> # A tibble: 5 × 7
#>     game_id team     opponent team_steals opp_steals team_assists opp_assists
#>       <int> <chr>    <chr>          <int>      <int>        <int>       <int>
#> 1 401468360 Spurs    Warriors           6          9           18          35
#> 2 401468358 Clippers Rockets            6         11           25          20
#> 3 401468359 Hawks    Bucks              4          4           23          21
#> 4 401468356 Thunder  Celtics            4         15           21          25
#> 5 401468357 Suns     Heat               9          6           22           3

DATA

dat <- structure(list(game_id = c(
  401468360L, 401468360L, 401468358L,
  401468358L, 401468359L, 401468359L, 401468356L, 401468356L, 401468357L,
  401468357L
), team = c(
  "Spurs", "Warriors", "Clippers", "Rockets",
  "Hawks", "Bucks", "Thunder", "Celtics", "Suns", "Heat"
), opponent = c(
  "Warriors",
  "Spurs", "Rockets", "Clippers", "Bucks", "Hawks", "Celtics",
  "Thunder", "Heat", "Suns"
), steals = c(
  6L, 9L, 6L, 11L, 4L, 4L,
  4L, 15L, 9L, 6L
), assists = c(
  18L, 35L, 25L, 20L, 23L, 21L, 21L,
  25L, 22L, 3L
)), class = "data.frame", row.names = c(
  "1", "2",
  "3", "4", "5", "6", "7", "8", "9", "10"
))
  • Related