I suspect I'm missing something obvious here, but am very confused by the behavior in this plot. Rather than trying to dput() the data here I provide a direct download link to the .csv
library(ggplot2)
library(dplyr)
library(magrittr)
variant_url <- "https://data.cdc.gov/api/views/jr58-6ysp/rows.csv"
variant_data <- read.csv("C:/<your download path here>/SARS-CoV-2_Variant_Proportions.csv", header = TRUE,
stringsAsFactors = FALSE)
variant_data$week_ending <- as.Date(variant_data$week_ending, format = "%m/%d/%Y")
USA_variant_data <- variant_data %>% filter(usa_or_hhsregion == "USA", week_ending >= "2022-06-01") %>%
select(week_ending, variant, share)
USA_variant_bar <- USA_variant_data %>% ggplot(aes(fill=variant, y=share, x=week_ending))
geom_bar(position="fill", stat="identity")
geom_text(aes(label=ifelse(share >0.1, variant,"")), position=position_fill(vjust=0.5), size = 2)
labs(x = "",
y = "Percent of genomes sequenced")
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1)),
axis.text.y = element_text(color='forestgreen'),
axis.title.y = element_text(color="forestgreen"),
axis.text.x = element_text(angle = 45,hjust = 1))
print(USA_variant_bar)
This produces the following plot
And I don't understand why the labels are repeating?
CodePudding user response:
Your data contains multiple observations per variant and date. Hence you end up with multiple labels. To fix that I would suggest to aggregate your data before passing it to ggplot:
library(ggplot2)
library(dplyr)
USA_variant_data <- USA_variant_data %>%
group_by(week_ending, variant) %>%
summarise(share = sum(share), .groups = "drop")
ggplot(USA_variant_data, aes(fill = variant, y = share, x = week_ending))
geom_col(position = "fill")
geom_text(aes(label = ifelse(share > 0.1, variant, "")), position = position_fill(vjust = 0.5), size = 2)
labs(
x = "",
y = "Percent of genomes sequenced"
)
theme(
plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1)),
axis.text.y = element_text(color = "forestgreen"),
axis.title.y = element_text(color = "forestgreen"),
axis.text.x = element_text(angle = 45, hjust = 1)
)