Home > database >  Put violin plot on sides and have a line with group average in R ggplot
Put violin plot on sides and have a line with group average in R ggplot

Time:10-08

I have a dataset, of which a subset is:

structure(list(Session = structure(c(2L, 1L, 1L, 2L, 2L, 2L, 
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Pre", 
"Post"), class = "factor"), First.Name = c(5019L, 8007L, 5059L, 
5037L, 5023L, 6023L, 8029L, 6009L, 7034L, 5011L, 5062L, 8026L, 
5019L, 7022L, 7017L, 7002L, 5037L, 5034L, 6005L, 8006L, 6036L, 
7011L, 5010L, 5038L, 6049L, 5007L, 5032L, 6016L, 5052L, 8033L, 
5026L, 7017L, 8002L, 7014L, 5061L, 7034L, 6018L, 6013L, 5063L, 
6016L, 5007L, 6014L, 5059L, 5049L, 7007L, 5024L, 7008L, 6007L, 
6041L, 5061L), Age = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), .Label = c("Young", 
"Old"), class = "factor"), Group = c("Increase", "Decrease", 
"Increase", "Increase", "Increase", "Decrease", "Decrease", "Decrease", 
"Increase", "Increase", "Increase", "Decrease", "Increase", "Increase", 
"Increase", "Increase", "Increase", "Increase", "Decrease", "Decrease", 
"Decrease", "Increase", "Increase", "Increase", "Decrease", "Increase", 
"Increase", "Decrease", "Increase", "Decrease", "Increase", "Increase", 
"Decrease", "Increase", "Increase", "Increase", "Decrease", "Decrease", 
"Increase", "Decrease", "Increase", "Decrease", "Increase", "Increase", 
"Increase", "Increase", "Increase", "Decrease", "Decrease", "Increase"
), RHR = c(52.8421052631579, 63.2857142857143, 63.2, 62.9090909090909, 
80.1818181818182, 61.5806451612903, 70, 64.2121212121212, 71.9, 
77.3333333333333, 64.75, 58.4, 48.4, 47.0277777777778, 49.9, 
63.3333333333333, 59.8571428571429, 54.2, 56.0322580645161, 62.375, 
59.8823529411765, 49.0882352941176, 73.4545454545455, 54.4516129032258, 
51.84, 62.5882352941176, 69.2307692307692, 53.5555555555556, 
52.6296296296296, 58.2608695652174, 58.3333333333333, 52.375, 
69, 51.4285714285714, 53.2, 76.125, 54.8571428571429, 59.625, 
70.0909090909091, 52.1612903225806, 63, 58.6, 61.8285714285714, 
61, 56.3333333333333, 68.3333333333333, 52, 69.9, 63.7142857142857, 
56.9428571428571)), row.names = c(NA, -50L), groups = structure(list(
    First.Name = c(235007L, 235007L, 235010L, 523011L, 235019L, 235019L, 
    235023L, 235024L, 235026L, 235032L, 235034L, 235037L, 235037L, 235038L, 235049L, 
    235052L, 235059L, 235059L, 235061L, 235061L, 235062L, 235063L, 236005L, 236007L, 
    236009L, 236013L, 236014L, 236016L, 236016L, 236018L, 236023L, 236036L, 236041L, 
    236049L, 237002L, 237007L, 237008L, 237011L, 237014L, 237017L, 237017L, 237022L, 
    237034L, 237034L, 238002L, 238006L, 238007L, 238026L, 238029L, 238033L), 
    Session = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
    2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
    1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Pre", 
    "Post"), class = "factor"), .rows = structure(list(41L, 26L, 
        23L, 10L, 13L, 1L, 5L, 46L, 31L, 27L, 18L, 17L, 4L, 24L, 
        44L, 29L, 3L, 43L, 35L, 50L, 11L, 39L, 19L, 48L, 8L, 
        38L, 42L, 28L, 40L, 37L, 6L, 21L, 49L, 25L, 16L, 45L, 
        47L, 22L, 34L, 32L, 15L, 14L, 36L, 9L, 33L, 20L, 2L, 
        12L, 7L, 30L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Which looks like:

# A tibble: 50 x 5
# Groups:   First.Name, Session [50]
   Session First.Name Age   Group      RHR
   <fct>        <int> <fct> <chr>    <dbl>
 1 Post          235019 Young Increase  52.8
 2 Pre           238007 Old   Decrease  63.3
 3 Pre           235059 Young Increase  63.2
 4 Post          235037 Young Increase  62.9
 5 Post          235023 Young Increase  80.2
 6 Post          236023 Young Decrease  61.6
 7 Pre           238029 Old   Decrease  70  
 8 Post          236009 Young Decrease  64.2
 9 Post          237034 Old   Increase  71.9
10 Post          235011 Young Increase  77.3

I am trying to create a ggplot like this: plot_generated

The code is:

#libraries
library(tidyverse)
library(ggpubr)
library(gghalves)

#constats
size           = 1.5
alpha          = .4

#calculate standard error for plot
se_fun <- function(x) sd(x)/sqrt(length(x))

#get mean RHR for se
mean_RHR <- DF %>% 
  group_by(Session, Group) %>% 
  summarise_at(vars(RHR), funs(mean(., na.rm=TRUE)))

#calculate se
se_RHR <- se_fun(mean_RHR$RHR)

#ggplot
ggplot(data = subset(DF, !is.na(Session)),
       aes(x = Session, y = RHR, color = Group))  
  geom_point(size  = size,
             alpha = alpha)  
#individual paired (or unpaired) data
  geom_line(aes(group = First.Name),
            alpha = 0.3)  
  geom_half_violin(aes(fill = Group),
                   alpha = alpha, 
                   side = "l",
                   position = position_nudge(x = .49))  
  geom_half_violin(aes(fill = Group),
                   alpha = alpha, 
                   side = "r",
                   position = position_nudge(x = -.49))  
#average line per group
  geom_line(aes(group = Group),
            linetype = 1, size =1.5)  
  geom_errorbar(aes(x = Session, y = RHR, 
                    ymin = RHR-se_RHR, ymax = RHR se_RHR, 
                    group=Group), 
                width = 0.05, size = 1, alpha = .9)  
  stat_compare_means(comparisons = c("Pre","Post"), paired = TRUE, na.rm = T)  
  theme_classic(base_size=24)

However, I am struggling to remove the original violin plot that is placed in the middle (I only want the halves on the side of the plot). And the geom_line averages per session are showing a vertical line along each data point that I would like removed.

So the desired plot below would remove the parts highlighted in yellow, as described above, and only keep:

  1. The half violin
  2. Individual subject paired lines (not error bars)
  3. Each session averaged geom_line byGroup and their corresponding error bars.

Help appreciated.

desired plot

CodePudding user response:

I'm not sure about what is test and DF but this plot may suits your purpose.

DF2 <- DF %>% 
  group_by(Group, Session) %>%
  summarise(se = sd(RHR), RHR = mean(RHR))

#ggplot
ggplot(data = subset(DF, !is.na(Session)),
       aes(x = Session, y = RHR, color = Group))  
  geom_point(size  = size,
             alpha = alpha)  
  geom_line(data = DF2, aes(x = Session, y = RHR, color = Group, group = Group)) 
  geom_half_violin(aes(fill = Group), data = DF %>% filter(Session == "Post"),
                   alpha = alpha, 
                   side = "l",
                   position = position_nudge(x = .49))  
  geom_half_violin(aes(fill = Group), data = DF %>% filter(Session == "Pre"),
                   alpha = alpha, 
                   side = "r",
                   position = position_nudge(x = -.49))  
  #average line per group
  
  geom_errorbar(data = DF2, aes(x = Session, y = RHR, 
                    ymin = RHR-se, ymax = RHR se, 
                    group=Group), 
                width = 0.5, size = 1, alpha = .9)  
  stat_compare_means(comparisons = c("Pre","Post"), paired = TRUE, na.rm = T)  
  theme_classic(base_size=24)

enter image description here

  • Related