Home > database >  position=dodge in geom_col in barplot
position=dodge in geom_col in barplot

Time:07-04

here is a dataset of soccer players that I need to visualise the total number of yellow cards received next to the number of games played per country in one bar plot. SO I need to calculate the total number of yellow cards and the total number of games per league country and bring the data into long format.

    dput(head(new_soccer_referee))
structure(list(playerShort = c("lucas-wilchez", "john-utaka", 
"abdon-prats", "pablo-mari", "ruben-pena", "aaron-hughes"), player = c("Lucas Wilchez", 
"John Utaka", " Abdón Prats", " Pablo Marí", " Rubén Peña", "Aaron Hughes"
), club = c("Real Zaragoza", "Montpellier HSC", "RCD Mallorca", 
"RCD Mallorca", "Real Valladolid", "Fulham FC"), leagueCountry = c("Spain", 
"France", "Spain", "Spain", "Spain", "England"), birthday = structure(c(4990, 
4390, 8386, 8643, 7868, 3598), class = "Date"), height = c(177L, 
179L, 181L, 191L, 172L, 182L), weight = c(72L, 82L, 79L, 87L, 
70L, 71L), position = c("Attacking Midfielder", "Right Winger", 
NA, "Center Back", "Right Midfielder", "Center Back"), games = c(1L, 
1L, 1L, 1L, 1L, 1L), victories = c(0L, 0L, 0L, 1L, 1L, 0L), ties = c(0L, 
0L, 1L, 0L, 0L, 0L), defeats = c(1L, 1L, 0L, 0L, 0L, 1L), goals = c(0L, 
0L, 0L, 0L, 0L, 0L), yellowCards = c(0L, 1L, 1L, 0L, 0L, 0L), 
    yellowReds = c(0L, 0L, 0L, 0L, 0L, 0L), redCards = c(0L, 
    0L, 0L, 0L, 0L, 0L), photoID = c("95212.jpg", "1663.jpg", 
    NA, NA, NA, "3868.jpg"), rater1 = c(0.25, 0.75, NA, NA, NA, 
    0.25), rater2 = c(0.5, 0.75, NA, NA, NA, 0), refNum = c(1L, 
    2L, 3L, 3L, 3L, 4L), refCountry = c(1L, 2L, 3L, 3L, 3L, 4L
    ), Alpha_3 = c("GRC", "ZMB", "ESP", "ESP", "ESP", "LUX"), 
    meanIAT = c(0.326391469021736, 0.203374724564378, 0.369893594187172, 
    0.369893594187172, 0.369893594187172, 0.325185154120009), 
    nIAT = c(712L, 40L, 1785L, 1785L, 1785L, 127L), seIAT = c(0.000564112354334542, 
    0.0108748941063986, 0.000229489640866464, 0.000229489640866464, 
    0.000229489640866464, 0.00329680952361961), meanExp = c(0.396, 
    -0.204081632653061, 0.588297311544544, 0.588297311544544, 
    0.588297311544544, 0.538461538461538), nExp = c(750L, 49L, 
    1897L, 1897L, 1897L, 130L), seExp = c(0.0026964901062936, 
    0.0615044043187379, 0.00100164730649311, 0.00100164730649311, 
    0.00100164730649311, 0.013752210497518), BMI = c(22.98190175237, 
    25.5922099809619, 24.1140380330271, 23.8480304816206, 23.6614386154678, 
    21.4346093466973), position_new = c("Offense", "Offense", 
    "Goalkeeper", "Defense", "Midfield", "Defense"), rater_mean = c(0.375, 
    0.75, NA, NA, NA, 0.125), ageinyear = c(28, 30, 19, 18, 20, 
    32), ageinyears = c(28, 30, 19, 18, 20, 32)), row.names = c(NA, 
6L), class = "data.frame")

Use the data to draw a bar plot with the following characteristics:

– The x-axis displays the league country while the y-axis displays the number of games and the number of cards

– For each country there are two bars next to each other: one for the games played and one for the cards received

barplot <- ggplot(new_soccer_referee,aes(x=leagueCountry,y=number))

barplot   

geom_bar(fill=c("games","yellowCards"))  

geom_col(Position="dodge")  
  labels(x="leagueCountry", y="number")

ggplot

`

I know it is pretty messy but I am really confused how to build up the layers with ggplot and how to work out the long format, can anyone help?

CodePudding user response:

One option would be to first aggregate your data to compute the number of yellowCards and games by leagueCountry. Afterwards you could convert to long which makes it easy to plot via ggplot2.

Using some fake random example data to mimic your real data:

set.seed(123)

new_soccer_referee <- data.frame(
  player = sample(letters, 20),
  leagueCountry = sample(c("Spain", "France", "England", "Italy"), 20, replace = TRUE),
  yellowCards = sample(1:5, 20, replace = TRUE),
  games = sample(1:20, 20, replace = TRUE)
)

library(dplyr)
library(tidyr)
library(ggplot2)

new_soccer_referee_long <- new_soccer_referee %>% 
  group_by(leagueCountry) %>%
  summarise(across(c(yellowCards, games), sum)) %>%
  pivot_longer(-leagueCountry, names_to = "variable", values_to = "number")

ggplot(new_soccer_referee_long, aes(leagueCountry, number, fill = variable))  
  geom_col(position = "dodge")

CodePudding user response:

Something like this:

library(tidyverse)

new_soccer_referee %>% 
  select(leagueCountry, games, yellowCards) %>% 
  group_by(leagueCountry) %>% 
  summarise(games = sum(games),
            yellowCars = sum(yellowCards)
            ) %>% 
  pivot_longer(-leagueCountry) %>% 
  ggplot(aes(x=leagueCountry, fill=name, y=value))  
  geom_col(position = position_dodge())

enter image description here

  • Related