I would like to have a bar chart with the distribution of the population I am studying, this bar chart has age as x axis (count is y) and the filling is ethnicity. I'd like to overlay some of the subjects inside their corresponding groups with a geom_scatter. However it creates its own axes. I can't share the data but here's a dummy tibble
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df
dmplot <- ggplot(df, aes(x = agegroup, fill = ethnicity ))
geom_bar(stat = "count")
labs(
x = "Age Group",
y = paste0("Population (total = ", df %>% nrow(), ")"))
geom_jitter(df,
aes(x = agegroup,
y = ethnicity)) # this is where I would need to retrieve the geom_bar fill location
dmplot
CodePudding user response:
This is a bit hacky, but I think something like this would do it. unfortunately it seems like you cannot assign jitter height to the aestetics, but you might be able to find another way to make height dependent on the height of the rect.
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df_2 <- df %>%
count(agegroup, ethnicity) %>%
group_by(agegroup ) %>%
mutate(top_rect = cumsum(n),
bottom_rect = lag(top_rect, default = 0))
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)
ggplot(df_2)
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity ))
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect top_rect)/2),
width = .3,
height = .5)
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup)))
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))
Update
now with labels
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)%>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()) %>%
left_join(df %>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()),
by = c("agegroup", "ethnicity", "id2"))
ggplot(df_2)
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity ))
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect top_rect)/2),
position = position_jitter(seed = 1, height =0.5))
geom_text(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect top_rect)/2,
label = id),
position = position_jitter(seed = 1, height =0.5))
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup)))
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))