I have a dataframe that looks like this:
And I'm trying to make it look like this:
team points
chicago_fire 1.725424
Club de Foot Montreal 1.0349628
Chicago Fire 1.16111572
Columbus Crew 1.591609
.
.
.
Which would mean using pivot_longer
selectively.
My code at the moment is as so:
expected_points <- simulate_results %>%
mutate(home_expected_points= 3* home_win_prob 1* draw_prob, away_expected_points = 3* away_win_prob 1* draw_prob) %>%
select(home,away, away_expected_points,home_expected_points) %>%
pivot_longer(,cols = c("home","away","away_expected_points","home_expected_points"), values_to = 'points',names_to = 'type')
expected_points
but I get an error from mixing the double and character values of the df. Is this something that such be done in one step ?
Here is the head of the df just before the pivot:
structure(list(home = c("Chicago Fire", "Chicago Fire", "Chicago Fire",
"Chicago Fire", "Chicago Fire", "Chicago Fire"), away = c("Club de Foot Montreal",
"Columbus Crew", "DC United", "Houston Dynamo", "Los Angeles Galaxy",
"New England Revolution"), away_expected_points = c(1.03496281825711,
1.16115715218849, 1.07116563833606, 1.09794662072245, 1.50393127764973,
1.22115923847215), home_expected_points = c(1.72542415785488,
1.59160932991963, 1.67152015760113, 1.64871325211506, 1.25278573638545,
1.52350845382982)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -6L), groups = structure(list(
home = "Chicago Fire", .rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
CodePudding user response:
An alternative using pmap_dfr()
.
library(tidyverse)
pmap_dfr(df, ~ bind_rows(tibble(team = ..1, points = ..4),
tibble(team = ..2, points = ..3)))
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22
An identical output can be created using pivot_wider()
twice, as also suggested by jpiversen.
library(tidyverse)
df %>%
mutate(id = row_number()) %>%
pivot_longer(c('home', 'away'), values_to = 'team', names_to = NULL) %>%
pivot_longer(ends_with('s'), values_to = 'points', names_to = NULL) %>%
group_by(id) %>%
filter(row_number() %in% c(2, 3)) %>%
ungroup() %>%
select(-id)
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22
CodePudding user response:
You need to do the pivoting in two steps:
- combining
home
andaway
- combining
away_expected_points
andhome_expected_points
Under is an example. I'm calling the data you provided df
:
library(tidyr)
df %>%
pivot_longer(
cols = c("home", "away"),
names_to = "home_away",
values_to = "team"
) %>%
pivot_longer(
cols = contains("expected_points"),
names_to = NULL,
values_to = "points"
)
#> # A tibble: 24 x 3
#> home_away team points
#> <chr> <chr> <dbl>
#> 1 home Chicago Fire 1.03
#> 2 home Chicago Fire 1.73
#> 3 away Club de Foot Montreal 1.03
#> 4 away Club de Foot Montreal 1.73
#> 5 home Chicago Fire 1.16
#> 6 home Chicago Fire 1.59
#> 7 away Columbus Crew 1.16
#> 8 away Columbus Crew 1.59
#> 9 home Chicago Fire 1.07
#> 10 home Chicago Fire 1.67
#> # ... with 14 more rows
PS: if you don't want the home_away
column you can just change the line names_to = "home_away"
to names_to = NULL
.