Home > OS >  Geom_vline not showing grouped lines in stacked scatter plots in ggplot
Geom_vline not showing grouped lines in stacked scatter plots in ggplot

Time:07-15

So I have a set of data (df) where there are some values a1, a2, and a3 generated by date and by group (b1, b2, b3). I want to stack these scatterplots on each other and facet wrap by group (seems to work). a1, a2 and a3 should be 3 different colors.

Then I have a second set of data (shuffled_dates) that I want to graph on the SAME plot as vertical x intercepts at certain dates. These I want to be dashed red lines and separated out also by group.

The problem Im having is that all the shuffled_dates are going on all the df values instead of by group (by b1, b2, b3 etc).

I've spent some time on this and am stumped, can someone explain why this is happening and suggest a way to fix it? Thank you

library(dplyr)
library(ggplot2)
set.seed(12)

dates <-seq(as.Date("2022-01-01"), as.Date("2022-04-15"), by="days") %>% data.frame(Date = .) 
dates2 <- dates[1:50,] %>% data.frame(Date = .)

name <- c("b1","b2","b3") %>% rep(20) %>% data.frame()
Names <- name[sample(1:50),] #for df
Names2 <- name[sample(1:15),] #for shuffled data

df <- data.frame(dates2, a1 = sample(50, 50, TRUE), a2= sample(50, 50, TRUE), a3= sample(50, 50, TRUE)) 
df <- cbind(df,Names) 
df<-df %>% group_by(Names) #not sure if this matters for the graph

shuffled_dates= dates[sample(1:105), ] %>% data.frame(Date = .) %>% slice(1:15)
shuffled_dates <- cbind(shuffled_dates,Names2)
shuffled_dates <- shuffled_dates %>% group_by(Names2) #not sure if this matters for the graph

head(df)
# A tibble: 6 x 5
# Groups:   Names [3]
  Date          a1    a2    a3 Names
  <date>     <int> <int> <int> <chr>
1 2022-01-01     8    49    30 b2   
2 2022-01-02    30     2    29 b2   
3 2022-01-03    42    20    48 b3   
4 2022-01-04    37    15    27 b3   
5 2022-01-05    43    39    14 b3   
6 2022-01-06    27    31    31 b1  

head(shuffled_dates)
# A tibble: 6 x 2
# Groups:   Names2 [3]
  Date       Names2
  <date>     <chr> 
1 2022-04-13 b2    
2 2022-01-23 b1    
3 2022-02-23 b3    
4 2022-03-26 b2    
5 2022-04-08 b3    
6 2022-03-05 b3 

plot1 <- ggplot(
  data=df, aes(x=Date, y=a1)) 
  geom_point(color = "red")  
  ylim(0, 50) 
  facet_wrap(~Names, ncol = 2) 
  geom_point(aes(y=a2), color = "orange")  
  geom_point(aes(y=a3), color = "purple") 
plot1

plot1   geom_vline(xintercept=as.numeric(shuffled_dates$Date),color="red",inherit.aes = TRUE)

# A couple other things I tried that didn't work
# plot1   geom_vline(xintercept=as.numeric(shuffled_dates$Date), color="red")
# plot1   geom_vline(aes(group=Names2),xintercept=as.numeric(shuffled_dates$Date), color="red")

Example of what I'd like to get (with fake date lines): enter image description here

Related questions: How to get geom_vline to honor facet_wrap?

How to produce different geom_vline in different facets in R?

edit, requested dput(df):

structure(list(Date = structure(c(18993, 18994, 18995, 18996, 
18997, 18998, 18999, 19000, 19001, 19002, 19003, 19004, 19005, 
19006, 19007, 19008, 19009, 19010, 19011, 19012, 19013, 19014, 
19015, 19016, 19017, 19018, 19019, 19020, 19021, 19022, 19023, 
19024, 19025, 19026, 19027, 19028, 19029, 19030, 19031, 19032, 
19033, 19034, 19035, 19036, 19037, 19038, 19039, 19040, 19041, 
19042), class = "Date"), a1 = c(8L, 8L, 38L, 18L, 39L, 16L, 36L, 
31L, 20L, 16L, 48L, 40L, 36L, 8L, 41L, 28L, 44L, 7L, 13L, 36L, 
14L, 3L, 44L, 19L, 6L, 34L, 8L, 9L, 19L, 29L, 25L, 39L, 19L, 
16L, 18L, 25L, 43L, 23L, 44L, 34L, 37L, 49L, 10L, 8L, 25L, 39L, 
15L, 36L, 5L, 5L), a2 = c(30L, 21L, 33L, 32L, 47L, 33L, 45L, 
42L, 48L, 41L, 33L, 29L, 33L, 40L, 29L, 41L, 11L, 49L, 9L, 1L, 
45L, 42L, 33L, 44L, 30L, 15L, 43L, 30L, 36L, 17L, 30L, 22L, 44L, 
23L, 22L, 14L, 40L, 6L, 5L, 18L, 35L, 35L, 4L, 22L, 13L, 26L, 
27L, 6L, 19L, 21L), a3 = c(26L, 44L, 48L, 25L, 33L, 41L, 13L, 
22L, 38L, 44L, 7L, 15L, 21L, 9L, 36L, 33L, 15L, 9L, 15L, 41L, 
26L, 28L, 13L, 43L, 19L, 19L, 41L, 34L, 3L, 13L, 11L, 21L, 50L, 
8L, 13L, 49L, 11L, 27L, 42L, 49L, 32L, 15L, 3L, 28L, 14L, 40L, 
1L, 21L, 34L, 33L), Names = c("b1", "b2", "b1", "b1", "b2", "b2", 
"b3", "b1", "b1", "b3", "b1", "b2", "b3", "b2", "b1", "b1", "b2", 
"b3", "b1", "b2", "b1", "b2", "b3", "b2", "b3", "b3", "b2", "b2", 
"b3", "b1", "b2", "b2", "b1", "b2", "b2", "b3", "b3", "b3", "b3", 
"b3", "b2", "b3", "b2", "b3", "b1", "b1", "b1", "b3", "b1", "b1"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-50L), groups = structure(list(Names = c("b1", "b2", "b3"), .rows = structure(list(
    c(1L, 3L, 4L, 8L, 9L, 11L, 15L, 16L, 19L, 21L, 30L, 33L, 
    45L, 46L, 47L, 49L, 50L), c(2L, 5L, 6L, 12L, 14L, 17L, 20L, 
    22L, 24L, 27L, 28L, 31L, 32L, 34L, 35L, 41L, 43L), c(7L, 
    10L, 13L, 18L, 23L, 25L, 26L, 29L, 36L, 37L, 38L, 39L, 40L, 
    42L, 44L, 48L)), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

CodePudding user response:

I think I got the issue, In the line you do not precise the wrap. I would try the following:

df %>%
   left_join(shuffled_dates %>% mutate(Names=Names2, Datefix=Date) %>% select(-Date, -Names2), by="Names") %>%
ggplot(aes(x=Date, y=a1))    
  geom_point(color = "red")   
  geom_vline(xintercept=as.numeric(Datefix), color="red",inherit.aes = TRUE)     
  ylim(0, 50)    
  facet_wrap(~Names, ncol = 2)    
  geom_point(aes(y=a2), color = "orange")     
  geom_point(aes(y=a3), color = "purple")

The issue I think is that in shuffled_dates your column name is Names2 and not Names. Otherwise I would try by filtering shuffled_dates, but I do not think it is necessary.

......

CodePudding user response:

Essentially, the problem is that you don't have the faceting variable in your shuffled_dates data frame. Renaming that column solves the issue.

Please also check my code improvement suggestions - mainly avoiding unnecessary piping and those cbind calls, and using head.

library(ggplot2)
set.seed(12)

dates <- data.frame(Date = seq(as.Date("2022-01-01"), as.Date("2022-04-15"), by = "days"))
dates2 <- head(dates, 50) 

name <- data.frame(rep(c("b1", "b2", "b3"), 20 ))
Names <- name[sample(1:50), ] # for df
Names2 <- name[sample(1:15), ] # for shuffled data

df <- data.frame(dates2, a1 = sample(50, 50, TRUE), a2 = sample(50, 50, TRUE), a3 = sample(50, 50, TRUE), Names)

## the actual problem is solved here by renaming Names2 to Names
shuffled_dates <- head(data.frame(Date = dates$Date[sample(1:105)], Names = Names2), 15)

p <- ggplot(
  data = df, aes(x = Date, y = a1)
)  
  geom_point(color = "red")  
  ylim(0, 50)  
  facet_wrap(~Names, ncol = 2)  
  geom_vline(data = shuffled_dates, aes(xintercept = Date), color = "red")
p

## if you want to show all lines, you also need to set the coordinate limits to include the vlines
p  
  coord_cartesian(xlim = range(c(df$Date, shuffled_dates$Date)))

Created on 2022-07-14 by the reprex package (v2.0.1)

  • Related