I have data that look like this (I'm only including the first twenty rows to show the spread of data but there are about 135 rows):
> dput(id)
structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L), id = c("L5", "L58",
"L73", "L21", "L5", "L58", "L73", "L5", "L73", "L7", "L57", "L21",
"L47", "L54", "L100", "J27", "J31", "J16", "J26", "J36"), sex = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
age = c(28L, 12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L,
54L, 30L, 27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9",
"L9", "L9", "L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45",
"L21", "L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"),
matralive = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "J", "J",
"J", "J", "J")), row.names = c(NA, -134L), class = c("tbl_df", "tbl", "data.frame"))
My goal is to create a graph that looks something like this, using ggplot:
I'm stuck as to how to generate this - I imagine I will have to group by pod and matriline, but am unsure how to go about this.
Any help is super appreciated - thank you so much!!
CodePudding user response:
Here is one possibility using geom_mark_ellipse
from ggforce
for the ellipses. To have multiple groups (i.e., matr
and date
) to draw the ellipses, we can use interaction
to combine the two columns into a new factor.
library(tidyverse)
library(ggforce)
df %>%
ggplot(aes(x = date, y = age ))
geom_point(aes(shape = factor(sex), colour = factor(pod), size = 5))
geom_mark_ellipse(aes(color = pod, group=interaction(date, matr), label = matr))
scale_y_continuous(expand = c(0, 25))
scale_x_discrete(expand = c(0, 1.35))
Output
Data
df <- structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L),
id = c("L5", "L58", "L73", "L21", "L5", "L58", "L73", "L5",
"L73", "L7", "L57", "L21", "L47", "L54", "L100", "J27", "J31",
"J16", "J26", "J36"), sex = structure(c(1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L
), .Label = c("0", "1"), class = "factor"), age = c(28L,
12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L, 54L, 30L,
27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9", "L9", "L9",
"L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45", "L21",
"L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"), matralive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "J", "J", "J", "J",
"J")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))