How to use different filters in geom_errorbar of ggplot2 in R-CodePudding

Using this sample data frame:

> dput(coun2b)
structure(list(Camden = c(13.9933481152993, 17.5410199556541, 
26.0055432372506, 19.1064301552106, 9.05764966740577, 17.5321507760532
), Guilford = c(24.674715261959, 27.5097949886105, 25.4646924829157, 
22.2637813211845, 7.60227790432802, 17.9681093394077), years = 2012:2017, 
    Camden_ymin = c(12.4514939737261, 15.4927722105436, 22.5744436662436, 
    16.8415649174844, 7.45264839077184, 15.6645677387521), Guilford_ymin = c(23.2136204848819, 
    26.3627764588421, 23.8076842636931, 20.383805927254, 5.58799564906578, 
    16.2548749333076), Camden_ymax = c(15.5352022568726, 19.5892677007646, 
    29.4366428082575, 21.3712953929369, 10.6626509440397, 19.3997338133543
    ), Guilford_ymax = c(26.1358100390361, 28.6568135183788, 
    27.1217007021384, 24.143756715115, 9.61656015959026, 19.6813437455079
    )), class = "data.frame", row.names = c(NA, -6L))

which looks like this:

coun2b
    Camden  Guilford Camden_ymin Guilford_ymin Camden_ymax Guilford_ymax
1 13.99335 24.674715   12.451494     23.213620    15.53520      26.13581
2 17.54102 27.509795   15.492772     26.362776    19.58927      28.65681
3 26.00554 25.464692   22.574444     23.807684    29.43664      27.12170
4 19.10643 22.263781   16.841565     20.383806    21.37130      24.14376
5  9.05765  7.602278    7.452648      5.587996    10.66265       9.61656
6 17.53215 17.968109   15.664568     16.254875    19.39973      19.68134

and I use that dataframe with this:

library(tidyverse)

ggplot(coun2b, aes(x=years, Guilford, group=years))   
  labs(title = "Counts in Guilford, N.C.", 
       #caption="P. infestans range: 18 - 22 C; P. nicotianae range: 25 - 35 C; \"a\" Year with\nmost N.C. P. infestans reports (n=16); \"aa\" Year with most N.C. P. nicotianae reports (n=23)",
       y="Number of Days", x="Year" )   geom_col( position = "dodge")  
  geom_errorbar(aes(ymin=Guilford_ymin, ymax=Guilford_ymax), position="dodge")   
  theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14), 
        axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14), 
        title = element_text(size=12))  
  scale_x_continuous("Year", labels = plotscalex, breaks=plotscalex)  
  geom_hline(aes(yintercept = mean(Guilford[years %in% 2012:2016]),
                 linetype='Mean for 2012-2016'))  
  scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )

I create this barplot:

However, my complete dataset is actually larger and shaped differently, as long version. This is a sample of the long version:

> dput(samp1)
structure(list(years = c(2012L, 2012L, 2012L, 2013L, 2013L, 2013L, 
2014L, 2014L, 2014L, 2012L, 2012L, 2012L, 2013L, 2013L, 2013L, 
2014L, 2014L, 2014L), valu = c("mean", "ymin", "ymax", "mean", 
"ymin", "ymax", "mean", "ymin", "ymax", "mean", "ymin", "ymax", 
"mean", "ymin", "ymax", "mean", "ymin", "ymax"), name = c("Camden", 
"Camden", "Camden", "Camden", "Camden", "Camden", "Camden", "Camden", 
"Camden", "Guilford", "Guilford", "Guilford", "Guilford", "Guilford", 
"Guilford", "Guilford", "Guilford", "Guilford"), value = c(13.9933481152993, 
12.4514939737261, 15.5352022568726, 17.5410199556541, 15.4927722105436, 
19.5892677007646, 26.0055432372506, 22.5744436662436, 29.4366428082575, 
24.674715261959, 23.2136204848819, 26.1358100390361, 27.5097949886105, 
26.3627764588421, 28.6568135183788, 25.4646924829157, 23.8076842636931, 
27.1217007021384), county = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Camden", 
"Guilford", "Pasquotank", "Wake"), class = "factor")), row.names = c(NA, 
-18L), class = c("tbl_df", "tbl", "data.frame"))

I tried using:

samp1 %>% filter(county == "Camden") %>% 
    ggplot( aes(x=years, y=value, group=years))   
    labs(title = "Number of Days in April-August with Suitable Weather for\nLate Blight Sporulation in Camden, N.C.", y="Number of Days", x="Year" )   
    geom_col(data=samp1 %>% filter(county=="Camden", valu=="mean"), aes(x=years, 
                                                                         y=value), position = "dodge")  
    geom_errorbar(data=samp1 %>% filter(county=="Camden"), 
                  aes(ymin=samp1 %>% filter(valu=="ymin"), ymax=samp1 %>% filter(valu=="ymax"), position="dodge"))   
    theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14), 
          axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14), 
          title = element_text(size=12))  
    scale_x_continuous("Year", labels = plotscalex, breaks=plotscalex)  
    geom_hline(aes(yintercept = mean(Camden[years %in% 2012:2016]),
                   linetype='Mean for 2012-2016')) 
    scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )

as an attempt to create the same plot as above with the data in the long form. I get this error message:

Error in `geom_errorbar()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 2nd layer.
Caused by error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (9)
✖ Fix the following mappings: `ymin` and `ymax`
Run `rlang::last_error()` to see where the error occurred.
Warning message:
In geom_errorbar(data = samp1 %>% filter(county == "Camden"), aes(ymin = samp1 %>%  :
  Ignoring unknown aesthetics: position

Because of the long form of this dataframe, I use filter 2x before I get to the geom_errorbar(). I don't think that's the problem, I just don't know how to filter correctly for ymin and ymax. I tried geom_errorbar(data=samp1 %>% filter(county=="Camden"), aes(ymin=samp1 %>% filter(county=="Camden", valu=="ymin"), ymax=samp1 %>% filter(county=="Camden",valu=="ymax"), position="dodge")) as well as what's in the code block above and I can't get it to work. How can I use the long form data, samp1, to create a plot that is the same as the plot created when the data are wide? I'm using the long form because I will have to do a side-by-side barplot for multiple counties, while in this post, I'm just using one county.

CodePudding user response：

You are making this much harder than it needs to be. What's wrong with a simple pivot to get your data into the correct format in the first place? The only wrangling you then need inside the plot code is to get the groupwise hline:

library(tidyverse)

sampl %>%
  pivot_wider(names_from = valu, values_from = value) %>%
  ggplot(aes(years, mean))  
  geom_col()  
  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25)  
  geom_hline(data = . %>% group_by(county) %>% summarize(mean = mean(mean)),
             aes(yintercept = mean), linetype = 2)  
  facet_grid(.~county)  
  theme_gray(base_size = 16)  
  theme(strip.background = element_blank(),
        strip.text = element_text(size = 20, face = 2))

Or, if you want to do one plot at a time:

sampl %>%
  pivot_wider(names_from = valu, values_from = value) %>%
  filter(county == "Guilford") %>%
  ggplot(aes(years, mean))  
  geom_col()  
  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25)  
  geom_hline(aes(yintercept = mean(mean)), linetype = 2)  
  theme_gray(base_size = 16)  
  ggtitle("Guilford")

CodePudding user response：

You get the error because the second data frame is not in the appropriate format: by pivoting we could set ymin and ymax to columns: Then we could filter only once and apply the code:

library(tidyverse)
  samp1 %>% 
    pivot_wider(names_from = valu, 
                values_from = value) %>% 
    filter(county == "Camden") %>% 
    ggplot( aes(x=years, y=mean, group=years))   
    labs(title = "Number of Days in April-August with Suitable Weather for\nLate Blight Sporulation in Camden, N.C.", y="Number of Days", x="Year" )   
    geom_col(position = "dodge") 
    geom_errorbar(aes(ymin=ymin, ymax=ymax), position="dodge") 
    theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14), 
          axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14), 
          title = element_text(size=12))  
    geom_hline(aes(yintercept = mean(mean),
                   linetype='Mean for 2012-2016')) 
    scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )