Home > OS >  Plotting counts and cumulative numbers in one plot
Plotting counts and cumulative numbers in one plot

Time:02-22

I have data as follows;

dat <- structure(list(group_size = structure(c(6L, 3L, 3L, 4L, 1L, 2L, 
2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L), .Label = c("(0,50]", 
"(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e 03]"
), class = "factor"), amount = c(409, 101, 103, 198, 40, 63, 
69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61), group_sum = c(1114, 
442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443, 
443, 133, 133, 375, 133, 442, 443), count = c(3L, 4L, 4L, 2L, 
5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L
)), row.names = c(NA, -20L), class = c("data.table", "data.frame"
))

     group_size amount group_sum count
 1: (250,3e 03]    409      1114     3
 2:   (100,150]    101       442     4
 3:   (100,150]    103       442     4
 4:   (150,200]    198       375     2
 5:      (0,50]     40       133     5
 6:    (50,100]     63       443     6
 7:    (50,100]     69       443     6
 8:      (0,50]     49       133     5
 9:   (100,150]    126       442     4
10: (250,3e 03]    304      1114     3
11:    (50,100]     91       443     6
12: (250,3e 03]    401      1114     3
13:    (50,100]     96       443     6
14:    (50,100]     63       443     6
15:      (0,50]     36       133     5
16:      (0,50]      1       133     5
17:   (150,200]    177       375     2
18:      (0,50]      7       133     5
19:   (100,150]    112       442     4
20:    (50,100]     61       443     6

I would like to have a plot with the group size on the x-axis, and both the count and group_sum on the y-axis.

EDIT: The scale on the y-axis should have the group_sum. The count could just be listed on top of bar with a number.

I am trying to adapt enter image description here

But I keep messing something up.. Any suggestions?

CodePudding user response:

Something like this?

library(tidyverse)

dat <- structure(list(group_size = structure(c(
  6L, 3L, 3L, 4L, 1L, 2L,
  2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L
), .Label = c(
  "(0,50]",
  "(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e 03]"
), class = "factor"), amount = c(
  409, 101, 103, 198, 40, 63,
  69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61
), group_sum = c(
  1114,
  442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443,
  443, 133, 133, 375, 133, 442, 443
), count = c(
  3L, 4L, 4L, 2L,
  5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L
)), row.names = c(NA, -20L), class = c("data.table", "data.frame"))

dat %>%
  as_tibble() %>%
  ggplot(aes(x = group_size))  
  geom_col(aes(y = group_sum), position = "identity", color = "red", fill = "transparent")  
  geom_label(
    data = dat %>% distinct(group_size, .keep_all = TRUE),
    mapping = aes(y = group_sum, label = group_sum),
    color = "red"
  )  
  geom_col(aes(y = count * 10), position = "identity", color = "blue", fill = "transparent")  
  geom_label(
    data = dat %>% distinct(count, .keep_all = TRUE),
    mapping = aes(y = count * 10, label = count),
    color = "blue"
  )  
  scale_y_continuous(sec.axis = sec_axis(trans = ~ . / 10, name = "Count"))

Created on 2022-02-22 by the example plot

CodePudding user response:

This gives a dodged plot pivoting the original data. Additionally, one needs to plot unique values of those variables in the data ignoring amount.

    library(ggplot2)
    library(dplyr)
    library(tidyr)
    
    dat <- structure(list(group_size = structure(
      c(6L, 3L, 3L, 4L, 1L, 2L, 2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L),
      .Label = c("(0,50]", "(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e 03]"), class = "factor"),
      amount = c(409, 101, 103, 198, 40, 63, 69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61),
      group_sum = c(1114, 442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443, 443, 133, 133, 375, 133, 442, 443),
      count = c(3L, 4L, 4L, 2L, 5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L      )),
      row.names = c(NA, -20L), class = c("data.table", "data.frame"))
    
    # Create a data set with unique values of desired variables
    dat_unique <- dat %>% distinct(group_size, group_sum, count)
    
    # long format of dat_unique
    dat_long <- dat_unique %>% 
      pivot_longer(cols = c("group_sum", "count"), names_to = "y_varname", values_to = "y") %>% 
      # effectively multiply y by 50 if y=="count"[![enter image description here][1]][1] to make it comparable to group_sum on the plot
      mutate(
        y_label = y,
        y = y   49*y*(y_varname=="count"))
    
    # plot
    dat_long %>% ggplot(aes(group_size, y, fill = y_varname))  
      geom_col(position = "dodge")  
      geom_text(aes(group_size, y, label =  y_label), position = position_dodge())

enter image description here

  • Related