I am new to R and I am practicing trying to create some basic graphs. In the below code I am getting an average line out of 2 variables. How can I add ±Standard Deviation to the average line? Any help would be much appreciated.
dat <- structure(list(distance = c(0, 0.2174741, 0.4349482, 0.6520882, 0.8695623, 1.0870364, 1.3045105, 1.5216505, 1.7391246, 1.9565987, 2.1740728, 2.3912128), grey1 = c(-300.364, -296.963, -292.887, -290.31, -285.777, -279.921, -274.418, -272.005, -273.666, -270.381, -270.273, -270.705), distance1 = c(0, 0.2114969, 0.4229937, 0.6341657, 0.8456625, 1.0571594, 1.2686562, 1.4798282, 1.6913251, 1.9028219, 2.1143188, 2.3254907), grey2 = c(-135.219, -132.601, -131.959, -133.514, -127.111, -116.404, -116.85, -115.464, -102.823, -101.497, -98.245, -98.474)), class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
library(data.table)
newdat <- data.table::melt(as.data.table(dat),
measure = patterns("^distance","^grey"),
value.name = c("distance", "grey"))
newdat
ggplot(newdat, aes(distance, grey))
geom_line(aes(color = variable, group = variable))
ggplot(newdat, aes(distance, grey, color = variable))
geom_line()
geom_point()
newdist <- seq(0, min(max(dat$distance), max(dat$distance1)), by = 0.1)
newdat2 <- newdat[, setNames(approx(distance, grey, xout = newdist), c("distance", "grey")), by = variable
][, .(variable = "Avg", grey = mean(grey)), by = distance]
newdat2 <- rbindlist(list(newdat, newdat2), use.names = TRUE)
ggplot(newdat2, aes(distance, grey, color = variable))
geom_line()
geom_point()
CodePudding user response:
Your x values are slightly different for the two different groups, so the standard deviation isn't really defined at each unique value of distance
. One compromise is to use a loess regression, which will include a confidence interval around the conditional mean of the two groups. This is straightforward using geom_smooth
:
ggplot(newdat, aes(distance, grey))
geom_line(aes(color = variable))
geom_smooth(aes(color = "Avg"), method = loess, size = 0.7, alpha = 0.15)
geom_point()
theme_light(base_size = 16)
If you are satisfied to use the row-wise values for means and sds, you could do:
library(data.table)
library(ggplot2)
newdat <- data.table::melt(as.data.table(dat),
measure = patterns("^distance","^grey"),
value.name = c("distance", "grey"))
dat$mean <- (dat$grey1 dat$grey2) / 2
dat$d <- (dat$distance1 dat$distance) / 2
dat$sd <- unlist(Map(function(a, b) sd(c(a, b)), dat$grey1, dat$grey2))
ggplot(newdat)
geom_line(aes(distance, grey, color = variable))
geom_ribbon(data = dat, aes(d, ymin = mean - sd, ymax = mean sd),
alpha = 0.15)
geom_line(data = dat, aes(d, mean, color = "Avg"))
geom_point(data = dat, aes(d, mean))
geom_point(aes(distance, grey))
theme_light(base_size = 16)