I am trying to find the missing GWs in this dataset. I used setdiff to find the missing weeks for a prticular name but I am not sure how to do it in one go for all unique names in the dataset? The original data set has a lot of names so doing it one by is difficult and I am also trying to avoid using loops.
Can someone please guide me to a dplyr solution? I basically want a dataframe that has names in one column and missing GWs in another column (this column will hold list of missing GWs)
Num_GW <- seq(1,38,1)
setdiff(Num_GW,City_LS %>% filter(name="Kevin De Bruyne") %>%
select(GW) %>% unique() %>% pull()
This is dput(City_LS) of sample data I am using.
structure(list(name = c("Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Kevin De Bruyne", "Kevin De Bruyne", "Bernardo Mota Veiga de Carvalho e Silva",
"Bernardo Mota Veiga de Carvalho e Silva", "Kevin De Bruyne",
"Bernardo Mota Veiga de Carvalho e Silva"), opp_team_name = c("Spurs",
"Spurs", "Norwich", "Norwich", "Arsenal", "Arsenal", "Leicester",
"Leicester", "Southampton", "Southampton", "Chelsea", "Chelsea",
"Liverpool", "Liverpool", "Burnley", "Burnley", "Brighton", "Brighton",
"Crystal Palace", "Crystal Palace", "Man Utd", "Man Utd", "Everton",
"Everton", "West Ham", "West Ham", "Aston Villa", "Aston Villa",
"Watford", "Watford", "Wolves", "Wolves", "Leeds", "Leeds", "Newcastle",
"Newcastle", "Leicester", "Leicester", "Brentford", "Brentford",
"Arsenal", "Arsenal", "Chelsea", "Chelsea", "Southampton", "Southampton",
"Brentford", "Brentford", "Norwich", "Norwich", "Spurs", "Spurs",
"Everton", "Everton", "Man Utd", "Man Utd", "Crystal Palace",
"Crystal Palace", "Burnley", "Burnley", "Liverpool", "Liverpool",
"Brighton", "Brighton", "Watford", "Watford", "Leeds", "Leeds",
"Newcastle", "Wolves", "Newcastle", "Wolves", "Aston Villa",
"Aston Villa"), GW = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L,
19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L,
26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 31L, 31L, 32L, 32L, 33L,
33L, 34L, 34L, 35L, 35L, 36L, 36L, 36L, 36L, 38L, 38L)), class = "data.frame", row.names = c(NA,
-74L))
CodePudding user response:
As a start:
library(tidyverse)
City_LS %>%
complete(name, GW = seq(1:max(City_LS$GW))) %>%
filter(is.na(opp_team_name))
# A tibble: 4 × 3
name GW opp_team_name
<chr> <int> <chr>
1 Bernardo Mota Veiga de Carvalho e Silva 30 NA
2 Bernardo Mota Veiga de Carvalho e Silva 37 NA
3 Kevin De Bruyne 30 NA
4 Kevin De Bruyne 37 NA