Home > other >  Grouping by multiple columns for ggplot
Grouping by multiple columns for ggplot

Time:03-02

I have data that look like this (I'm only including the first twenty rows to show the spread of data but there are about 135 rows):

> dput(id)
structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992", 
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003", 
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004", 
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005", 
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L, 
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L), id = c("L5", "L58", 
"L73", "L21", "L5", "L58", "L73", "L5", "L73", "L7", "L57", "L21", 
"L47", "L54", "L100", "J27", "J31", "J16", "J26", "J36"), sex = structure(c(1L, 
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 2L, 1L), .Label = c("0", "1"), class = "factor"), 
age = c(28L, 12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L, 
54L, 30L, 27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9", 
"L9", "L9", "L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45", 
"L21", "L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"),
matralive = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L", 
"L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "J", "J", 
"J", "J", "J")), row.names = c(NA, -134L), class = c("tbl_df", "tbl", "data.frame"))

My goal is to create a graph that looks something like this, using ggplot:

enter image description here

I'm stuck as to how to generate this - I imagine I will have to group by pod and matriline, but am unsure how to go about this.

Any help is super appreciated - thank you so much!!

CodePudding user response:

Here is one possibility using geom_mark_ellipse from ggforce for the ellipses. To have multiple groups (i.e., matr and date) to draw the ellipses, we can use interaction to combine the two columns into a new factor.

library(tidyverse)
library(ggforce)

df %>%
  ggplot(aes(x = date, y = age ))  
  geom_point(aes(shape = factor(sex), colour = factor(pod), size = 5))  
  geom_mark_ellipse(aes(color = pod, group=interaction(date, matr), label = matr))  
  scale_y_continuous(expand = c(0, 25))  
  scale_x_discrete(expand = c(0, 1.35))

Output

enter image description here

Data

df <- structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992", 
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003", 
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004", 
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005", 
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L, 
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L), 
    id = c("L5", "L58", "L73", "L21", "L5", "L58", "L73", "L5", 
    "L73", "L7", "L57", "L21", "L47", "L54", "L100", "J27", "J31", 
    "J16", "J26", "J36"), sex = structure(c(1L, 2L, 2L, 1L, 1L, 
    2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L
    ), .Label = c("0", "1"), class = "factor"), age = c(28L, 
    12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L, 54L, 30L, 
    27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9", "L9", "L9", 
    "L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45", "L21", 
    "L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"), matralive = c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L", "L", "L", 
    "L", "L", "L", "L", "L", "L", "L", "L", "J", "J", "J", "J", 
    "J")), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
"data.frame"))
  • Related