Using this sample data frame:
> dput(coun2b)
structure(list(Camden = c(13.9933481152993, 17.5410199556541,
26.0055432372506, 19.1064301552106, 9.05764966740577, 17.5321507760532
), Guilford = c(24.674715261959, 27.5097949886105, 25.4646924829157,
22.2637813211845, 7.60227790432802, 17.9681093394077), years = 2012:2017,
Camden_ymin = c(12.4514939737261, 15.4927722105436, 22.5744436662436,
16.8415649174844, 7.45264839077184, 15.6645677387521), Guilford_ymin = c(23.2136204848819,
26.3627764588421, 23.8076842636931, 20.383805927254, 5.58799564906578,
16.2548749333076), Camden_ymax = c(15.5352022568726, 19.5892677007646,
29.4366428082575, 21.3712953929369, 10.6626509440397, 19.3997338133543
), Guilford_ymax = c(26.1358100390361, 28.6568135183788,
27.1217007021384, 24.143756715115, 9.61656015959026, 19.6813437455079
)), class = "data.frame", row.names = c(NA, -6L))
which looks like this:
coun2b
Camden Guilford Camden_ymin Guilford_ymin Camden_ymax Guilford_ymax
1 13.99335 24.674715 12.451494 23.213620 15.53520 26.13581
2 17.54102 27.509795 15.492772 26.362776 19.58927 28.65681
3 26.00554 25.464692 22.574444 23.807684 29.43664 27.12170
4 19.10643 22.263781 16.841565 20.383806 21.37130 24.14376
5 9.05765 7.602278 7.452648 5.587996 10.66265 9.61656
6 17.53215 17.968109 15.664568 16.254875 19.39973 19.68134
and I use that dataframe with this:
library(tidyverse)
ggplot(coun2b, aes(x=years, Guilford, group=years))
labs(title = "Counts in Guilford, N.C.",
#caption="P. infestans range: 18 - 22 C; P. nicotianae range: 25 - 35 C; \"a\" Year with\nmost N.C. P. infestans reports (n=16); \"aa\" Year with most N.C. P. nicotianae reports (n=23)",
y="Number of Days", x="Year" ) geom_col( position = "dodge")
geom_errorbar(aes(ymin=Guilford_ymin, ymax=Guilford_ymax), position="dodge")
theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14),
axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14),
title = element_text(size=12))
scale_x_continuous("Year", labels = plotscalex, breaks=plotscalex)
geom_hline(aes(yintercept = mean(Guilford[years %in% 2012:2016]),
linetype='Mean for 2012-2016'))
scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )
I create this barplot:
However, my complete dataset is actually larger and shaped differently, as long version. This is a sample of the long version:
> dput(samp1)
structure(list(years = c(2012L, 2012L, 2012L, 2013L, 2013L, 2013L,
2014L, 2014L, 2014L, 2012L, 2012L, 2012L, 2013L, 2013L, 2013L,
2014L, 2014L, 2014L), valu = c("mean", "ymin", "ymax", "mean",
"ymin", "ymax", "mean", "ymin", "ymax", "mean", "ymin", "ymax",
"mean", "ymin", "ymax", "mean", "ymin", "ymax"), name = c("Camden",
"Camden", "Camden", "Camden", "Camden", "Camden", "Camden", "Camden",
"Camden", "Guilford", "Guilford", "Guilford", "Guilford", "Guilford",
"Guilford", "Guilford", "Guilford", "Guilford"), value = c(13.9933481152993,
12.4514939737261, 15.5352022568726, 17.5410199556541, 15.4927722105436,
19.5892677007646, 26.0055432372506, 22.5744436662436, 29.4366428082575,
24.674715261959, 23.2136204848819, 26.1358100390361, 27.5097949886105,
26.3627764588421, 28.6568135183788, 25.4646924829157, 23.8076842636931,
27.1217007021384), county = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Camden",
"Guilford", "Pasquotank", "Wake"), class = "factor")), row.names = c(NA,
-18L), class = c("tbl_df", "tbl", "data.frame"))
I tried using:
samp1 %>% filter(county == "Camden") %>%
ggplot( aes(x=years, y=value, group=years))
labs(title = "Number of Days in April-August with Suitable Weather for\nLate Blight Sporulation in Camden, N.C.", y="Number of Days", x="Year" )
geom_col(data=samp1 %>% filter(county=="Camden", valu=="mean"), aes(x=years,
y=value), position = "dodge")
geom_errorbar(data=samp1 %>% filter(county=="Camden"),
aes(ymin=samp1 %>% filter(valu=="ymin"), ymax=samp1 %>% filter(valu=="ymax"), position="dodge"))
theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14),
axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14),
title = element_text(size=12))
scale_x_continuous("Year", labels = plotscalex, breaks=plotscalex)
geom_hline(aes(yintercept = mean(Camden[years %in% 2012:2016]),
linetype='Mean for 2012-2016'))
scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )
as an attempt to create the same plot as above with the data in the long form. I get this error message:
Error in `geom_errorbar()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 2nd layer.
Caused by error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (9)
✖ Fix the following mappings: `ymin` and `ymax`
Run `rlang::last_error()` to see where the error occurred.
Warning message:
In geom_errorbar(data = samp1 %>% filter(county == "Camden"), aes(ymin = samp1 %>% :
Ignoring unknown aesthetics: position
Because of the long form of this dataframe, I use filter
2x before I get to the geom_errorbar()
. I don't think that's the problem, I just don't know how to filter
correctly for ymin and ymax. I tried geom_errorbar(data=samp1 %>% filter(county=="Camden"), aes(ymin=samp1 %>% filter(county=="Camden", valu=="ymin"), ymax=samp1 %>% filter(county=="Camden",valu=="ymax"), position="dodge"))
as well as what's in the code block above and I can't get it to work. How can I use the long form data, samp1
, to create a plot that is the same as the plot created when the data are wide? I'm using the long form because I will have to do a side-by-side barplot for multiple counties, while in this post, I'm just using one county.
CodePudding user response:
You are making this much harder than it needs to be. What's wrong with a simple pivot to get your data into the correct format in the first place? The only wrangling you then need inside the plot code is to get the groupwise hline
:
library(tidyverse)
sampl %>%
pivot_wider(names_from = valu, values_from = value) %>%
ggplot(aes(years, mean))
geom_col()
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25)
geom_hline(data = . %>% group_by(county) %>% summarize(mean = mean(mean)),
aes(yintercept = mean), linetype = 2)
facet_grid(.~county)
theme_gray(base_size = 16)
theme(strip.background = element_blank(),
strip.text = element_text(size = 20, face = 2))
Or, if you want to do one plot at a time:
sampl %>%
pivot_wider(names_from = valu, values_from = value) %>%
filter(county == "Guilford") %>%
ggplot(aes(years, mean))
geom_col()
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25)
geom_hline(aes(yintercept = mean(mean)), linetype = 2)
theme_gray(base_size = 16)
ggtitle("Guilford")
CodePudding user response:
You get the error because the second data frame is not in the appropriate format: by pivoting we could set ymin and ymax to columns: Then we could filter only once and apply the code:
library(tidyverse)
samp1 %>%
pivot_wider(names_from = valu,
values_from = value) %>%
filter(county == "Camden") %>%
ggplot( aes(x=years, y=mean, group=years))
labs(title = "Number of Days in April-August with Suitable Weather for\nLate Blight Sporulation in Camden, N.C.", y="Number of Days", x="Year" )
geom_col(position = "dodge")
geom_errorbar(aes(ymin=ymin, ymax=ymax), position="dodge")
theme(axis.text.x = element_text(face="bold"), axis.title.x = element_text(size=14),
axis.text.y = element_text(face="bold"), axis.title.y = element_text(size=14),
title = element_text(size=12))
geom_hline(aes(yintercept = mean(mean),
linetype='Mean for 2012-2016'))
scale_linetype_manual(name="Legend", values=c("Mean for 2012-2016"=1) )