I have the following dataframe and I want to generate a scatter plot with points ordered by date.
structure(list(cluster = c("uid_1", "uid_2", "uid_3", "uid_4",
"uid_5", "uid_6", "Cluster_07", "Cluster_07", "Cluster_07", "Cluster_07",
"Cluster_07", "uid_12", "uid_13", "Cluster_07", "Cluster_07",
"uid_16", "Cluster_07", "Cluster_07", "uid_19", "uid_20", "uid_21",
"uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27", "uid_28",
"uid_29", "Cluster_50", "uid_31", "uid_32", "uid_33", "uid_34",
"uid_35", "uid_36", "Cluster_50", "uid_38", "uid_39", "uid_40",
"Cluster_50"), name = c("E569", "E847", "E848", "E882", "E398",
"E443", "E462", "E970", "E1078", "E831", "E866", "E1047", "E964",
"E507", "E774", "E1106", "E943", "E1069", "E867", "E868", "E44",
"E60", "E869", "E482", "E125", "E126", "E114", "E123", "E81",
"E504", "E178", "E179", "E180", "E181", "E299", "E793", "E219",
"E182", "E183", "E184", "E650"), date = structure(c(18281, 18396,
18396, 18414, 18441, 18526, 18586, 18267, 18300, 18317, 18418,
18437, 18461, 18469, 18470, 18476, 18497, 18501, 18564, 18568,
18341, 18353, 18383, 18401, 18411, 18411, 18423, 18435, 18495,
18498, 18542, 18552, 18559, 18559, 18598, 18604, 18617, 18277,
18277, 18280, 18340), class = "Date"), group_id = c(3L, 8L, 19L,
28L, 30L, 31L, 1L, 1L, 1L, 1L, 1L, 4L, 5L, 1L, 1L, 6L, 1L, 1L,
7L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 2L, 20L,
21L, 22L, 23L, 24L, 25L, 2L, 26L, 27L, 29L, 2L)), row.names = c(NA,
-41L), groups = structure(list(cluster = c("Cluster_07", "Cluster_50",
"uid_1", "uid_12", "uid_13", "uid_16", "uid_19", "uid_2", "uid_20",
"uid_21", "uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27",
"uid_28", "uid_29", "uid_3", "uid_31", "uid_32", "uid_33", "uid_34",
"uid_35", "uid_36", "uid_38", "uid_39", "uid_4", "uid_40", "uid_5",
"uid_6"), .rows = structure(list(c(7L, 8L, 9L, 10L, 11L, 14L,
15L, 17L, 18L), c(30L, 37L, 41L), 1L, 12L, 13L, 16L, 19L, 2L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 3L, 31L,
32L, 33L, 34L, 35L, 36L, 38L, 39L, 4L, 40L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -31L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I could generate the plot using the following code.
ggplot(subset_so, aes(reorder(date,factor(cluster)), reorder(factor(cluster),date)))
geom_line(aes(group=cluster), color='black')
geom_point(size=3)
ylab("cluster") xlab("date")
theme_light()
theme(
legend.position = "bottom",
legend.key=element_rect(fill='gray96'),
legend.title =element_text(size=10),
text=element_text(size=12),
axis.title.x = element_text(vjust = 0, size = 11),
axis.title.y = element_text(vjust = 2, size = 11),
axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
But what I want is, irrespective of other points, y axis should sort by date based on the first point in Cluster_07 and Cluster_50.
For example,
Cluster_07 has the earliest date so should be at the bottom-most on the y-axis. Similarly Cluster_50 should be placed before uid_21 in the y-axis.
May I know, if there is a way to do this? Many thanks in advance!
CodePudding user response:
It sounds like you want to sort clusters based on their minimum date. We can do this by ungrouping (so the clusters can be compared to others) and then I like forcats::fct_reorder
where the three main parameters are 1) the vector to be ordered, 2) the variable to sort by, 3) the summary function to use.
library(dplyr); library(ggplot2)
subset_so %>%
ungroup() %>%
mutate(cluster = forcats::fct_reorder(cluster, date, min)) %>%
ggplot(aes(date, cluster))
geom_line(aes(group=cluster), color='black')
geom_point(size=3)
ylab("cluster") xlab("date")
theme_light()
theme(
legend.position = "bottom",
legend.key=element_rect(fill='gray96'),
legend.title =element_text(size=10),
text=element_text(size=12),
axis.title.x = element_text(vjust = 0, size = 11),
axis.title.y = element_text(vjust = 2, size = 11),
axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())