How to draw heterogeneity across ids in my panel data set?-CodePudding

dput(df2007[1:20, 1:3]) 
structure(list(ID = c(1120L, 1120L, 1120L, 1120L, 1111L, 1111L, 
1111L, 1111L, 1123L, 1123L, 1123L, 1123L, 1135L, 1135L, 1135L, 
1119L, 1119L, 1119L, 1119L, 1124L), yr = c(2007L, 2008L, 2010L, 
2011L, 2007L, 2008L, 2010L, 2011L, 2007L, 2008L, 2010L, 2011L, 
2007L, 2008L, 2010L, 2007L, 2008L, 2010L, 2011L, 2007L), cm = c(1.1, 
1.1, 1.4, 1.3, 1.6, 1.6, 1.7, 1.9, 1.5, 1.5, 1.5, 1.6, 0.9, 1, 
1.2, 2.1, 2.2, 3.4, 4.1, 0.8)), row.names = c("22", "23", "24", 
"25", "171", "172", "173", "174", "214", "215", "216", "217", 
"218", "219", "220", "262", "263", "264", "265", "266"), class = "data.frame")

What I want is kind of like this figure:

I refer to the code from online. But the outcome is a big messy.

df2007 %>%
  group_by(ID) %>%
  summarise(cm_mean = mean(cm)) %>%
  left_join(df2007) %>%
  ggplot(data = ., 
         aes(x = reorder(as.character(ID), ID), y = cm))  
  geom_point()  
  geom_line(aes(x = ID, y = cm_mean), col = "blue")  
  labs(x = "ID", y = "Diameter")

CodePudding user response：

You don't need the reordering:

df2007 <- structure(list(ID = c(1120L, 1120L, 1120L, 1120L, 1111L, 1111L, 
                      1111L, 1111L, 1123L, 1123L, 1123L, 1123L, 1135L, 1135L, 1135L, 
                      1119L, 1119L, 1119L, 1119L, 1124L), yr = c(2007L, 2008L, 2010L, 
                                                                 2011L, 2007L, 2008L, 2010L, 2011L, 2007L, 2008L, 2010L, 2011L, 
                                                                 2007L, 2008L, 2010L, 2007L, 2008L, 2010L, 2011L, 2007L), cm = c(1.1, 
                                                                                                                                 1.1, 1.4, 1.3, 1.6, 1.6, 1.7, 1.9, 1.5, 1.5, 1.5, 1.6, 0.9, 1, 
                                                                                                                                 1.2, 2.1, 2.2, 3.4, 4.1, 0.8)), row.names = c("22", "23", "24", 
                                                                                                                                                                               "25", "171", "172", "173", "174", "214", "215", "216", "217", 
                                                                                                                                                                               "218", "219", "220", "262", "263", "264", "265", "266"), class = "data.frame")
library(ggplot2)
library(dplyr)

df2007 %>% 
  group_by(ID) %>% 
  summarise(
    cm_mean = mean(cm)
  ) %>% 
  left_join(df2007, by = "ID") %>% 
  # use the ID as a factor
  mutate(ID = as.factor(ID)) %>% 
  ggplot()  
  aes(
    x = ID,
    y = cm
  )  
  geom_point()  
  # add group = 1 for geom_line to function correctly
  geom_line(aes(y = cm_mean, group = 1), col = "blue")

^{Created on 2022-12-08 by the reprex package (v1.0.0)}