I'm smoothing time-series data and plotting them with ggplot
. In the past I smoothed the data with TTR, but recently started smoothing on the fly inside ggplot. However, it's producing two artefacts and I'm not sure what I'm missing here.
- smoothing inside ggplot shifts the data along the time axis
- smoothing inside ggplot produces an erroneous drop off at the end for one data series, but not the other.
ggplot(data=df, aes(x=date, y=x, color=group))
geom_line(aes(y=rollmean(x, 10, fill=NA, align='left'), color=group), na.rm= TRUE, size=0.75)
produces
whereas
df.1.ts<-read.zoo(df[df$group=='series1',], format = "%Y-%m-%d")
df.1.SMA10<-data.frame(apply(df.1.ts[,1,drop=F], 2, SMA, n=10))
df.1.SMA10<-cbind(as.Date(time(df.1.ts)), df.1.SMA10)
df.1.SMA10$group<-'series1'
names(df.1.SMA10)[1]<-'date'
df.2.ts<-read.zoo(df[df$group=='series2',], format = "%Y-%m-%d")
df.2.SMA10<-data.frame(apply(df.2.ts[,1,drop=F], 2, SMA, n=10))
df.2.SMA10<-cbind(as.Date(time(df.2.ts)), df.2.SMA10)
df.2.SMA10$group<-'series2'
names(df.2.SMA10)[1]<-'date'
df.SMA10<-rbind(df.1.SMA10, df.2.SMA10)
ggplot(data=df.SMA10, aes(x=date, y=x, color=group))
geom_line(size=0.75, na.rm=T)
produces
Sample data:
df<-structure(list(date = structure(c(14242, 14243, 14244, 14245,
14246, 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254,
14255, 14256, 14257, 14258, 14259, 14260, 14261, 14262, 14263,
14264, 14265, 14266, 14267, 14268, 14269, 14270, 14271, 14272,
14273, 14274, 14275, 14276, 14277, 14278, 14279, 14280, 14281,
14282, 14283, 14284, 14285, 14286, 14287, 14288, 14289, 14290,
14291, 14292, 14293, 14294, 14295, 14296, 14297, 14298, 14299,
14300, 14301, 14302, 14303, 14304, 14305, 14306, 14307, 14308,
14309, 14310, 14311, 14312, 14313, 14314, 14315, 14316, 14317,
14318, 14319, 14320, 14321, 14322, 14323, 14324, 14325, 14326,
14327, 14328, 14329, 14330, 14331, 14332, 14333, 14334, 14335,
14214, 14215, 14216, 14217, 14218, 14219, 14220, 14221, 14222,
14223, 14224, 14225, 14226, 14227, 14228, 14229, 14230, 14231,
14232, 14233, 14234, 14235, 14236, 14237, 14238, 14239, 14240,
14241, 14242, 14243, 14244, 14245, 14246, 14247, 14248, 14249,
14250, 14251, 14252, 14253, 14254, 14255, 14256, 14257, 14258,
14259, 14260, 14261, 14262, 14263, 14264, 14265, 14266, 14267,
14268, 14269, 14270, 14271, 14272, 14273, 14274, 14275, 14276,
14277, 14278, 14279, 14280, 14281, 14282, 14283, 14284, 14285,
14286, 14287, 14288, 14289, 14290, 14291, 14292, 14293, 14294,
14295, 14296, 14297, 14298, 14299, 14300, 14301, 14302, 14303,
14304, 14305, 14306, 14307, 14308, 14309, 14310, 14311, 14312,
14313, 14314, 14315, 14316, 14317, 14318, 14319, 14320, 14321,
14322, 14323, 14324, 14325, 14326), class = "Date"), x = c(0.859649122807018,
0.583333333333333, 0.868055555555556, 0.78125, 0.524305555555556,
0.475694444444444, 0.538194444444444, 0.798611111111111, 0.576388888888889,
0.819444444444444, 0.746527777777778, 0.725694444444444, 0.336805555555556,
0.263888888888889, 0.486111111111111, 0.701388888888889, 0.864583333333333,
0.701388888888889, 0.524305555555556, 0.916666666666667, 0.715277777777778,
0.857638888888889, 0.305555555555556, 0.701388888888889, 0.774305555555556,
0.857638888888889, 0.961805555555556, 0.840277777777778, 0.913194444444444,
0.909722222222222, 0.746527777777778, 0.711805555555556, 0.895833333333333,
0.666666666666667, 0.993055555555556, 0.96875, 0.597222222222222,
0.725694444444444, 0.791666666666667, 0.875, 0.9375, 0.788194444444444,
0.708333333333333, 0.951388888888889, 0.819444444444444, 0.989583333333333,
0.965277777777778, 0.947916666666667, 0.996527777777778, 0.979166666666667,
0.944444444444444, 0.902777777777778, 0.996527777777778, 0.975694444444444,
1, 1, 1, 1, 0.96875, 0.993055555555556, 0.982638888888889, 0.729166666666667,
1, 0.993055555555556, 0.975694444444444, 0.996527777777778, 0.993055555555556,
0.975694444444444, 0.996527777777778, 0.989583333333333, 0.996527777777778,
1, 0.975694444444444, 0.996527777777778, 1, 0.989583333333333,
0.996527777777778, 1, 0.996527777777778, 0.975694444444444, 0.975694444444444,
0.979166666666667, 0.944444444444444, 0.989583333333333, 1, 0.986111111111111,
0.951388888888889, 0.979166666666667, 0.993055555555556, 0.989583333333333,
0.951388888888889, 0.996527777777778, 0.993055555555556, 1, 0.0390070921985816,
0.0173611111111111, 0.229166666666667, 0, 0, 0.107638888888889,
0.0208333333333333, 0.0763888888888889, 0, 0.121527777777778,
0.00694444444444444, 0.159722222222222, 0.59375, 0.131944444444444,
0.131944444444444, 0.0138888888888889, 0.00694444444444444, 0.0659722222222222,
0.461805555555556, 0.277777777777778, 0.638888888888889, 0.784722222222222,
0.892361111111111, 0.6875, 0.631944444444444, 0.180555555555556,
0.00347222222222222, 0.166666666666667, 0.152777777777778, 0,
0.659722222222222, 0.53125, 0.159722222222222, 0.232638888888889,
0.673611111111111, 0.670138888888889, 0.631944444444444, 0.760416666666667,
0.829861111111111, 0.902777777777778, 0.788194444444444, 0.638888888888889,
0.65625, 0.836805555555556, 0.680555555555556, 0.715277777777778,
0.677083333333333, 0.798611111111111, 0.579861111111111, 0.788194444444444,
0.826388888888889, 0.895833333333333, 0.899305555555556, 0.930555555555556,
0.958333333333333, 0.90625, 0.861111111111111, 0.934027777777778,
0.798611111111111, 0.888888888888889, 0.961805555555556, 0.975694444444444,
0.993055555555556, 0.996527777777778, 0.850694444444444, 0.902777777777778,
0.979166666666667, 0.986111111111111, 0.993055555555556, 0.975694444444444,
0.809027777777778, 0.972222222222222, 0.951388888888889, 0.899305555555556,
0.930555555555556, 0.961805555555556, 0.996527777777778, 0.989583333333333,
0.961805555555556, 0.965277777777778, 0.989583333333333, 0.989583333333333,
0.940972222222222, 0.996527777777778, 0.947916666666667, 0.982638888888889,
1, 1, 0.979166666666667, 0.909722222222222, 0.930555555555556,
0.704861111111111, 0.833333333333333, 0.902777777777778, 0.940972222222222,
0.96875, 0.802083333333333, 0.836805555555556, 0.989583333333333,
0.961805555555556, 1, 0.993055555555556, 0.809027777777778, 0.989583333333333,
0.993055555555556, 0.954861111111111, 0.979166666666667, 0.989583333333333,
0.982638888888889, 0.989583333333333, 1, 0.961805555555556, 0.925581395348837
), group = c("series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series1",
"series1", "series1", "series1", "series1", "series1", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2", "series2", "series2",
"series2", "series2", "series2", "series2")), row.names = c(NA,
-207L), class = "data.frame")
CodePudding user response:
In your ggplot(.)
code, you are calling rollmean(x, ...)
, which is rolling on all of x
, regardless of group. If you want it to be per-group, you can do the following:
ggplot(data=df, aes(x=date, y=x, color=group))
geom_line(aes(y=ave(x, group, FUN = function(Z) zoo::rollmean(Z, 10, fill=NA, align='left')),
color=group), na.rm= TRUE, size=0.75)
I tend to put aggregating/data-munging code outside of ggplot2
, though, and this would have helped identify the problem:
df$rolly <- zoo::rollmean(df$x, 10, fill=NA, align='left')
xtabs(~ group is.na(rolly), data = df)
# is.na(rolly)
# group FALSE TRUE
# series1 94 0
# series2 104 9
df[c(1:3, 92:97, 196:207),]
# date x group rolly
# 1 2008-12-29 0.85964912 series1 0.68249269
# 2 2008-12-30 0.58333333 series1 0.67118056
# 3 2008-12-31 0.86805556 series1 0.68541667
# 92 2009-03-30 0.99652778 series1 0.34035904
# 93 2009-03-31 0.99305556 series1 0.24834515
# 94 2009-04-01 1.00000000 series1 0.14903960
# 95 2008-12-01 0.03900709 series2 0.06119238
# 96 2008-12-02 0.01736111 series2 0.05798611
# 97 2008-12-03 0.22916667 series2 0.07222222
# 196 2009-03-12 0.99305556 series2 0.96805556
# 197 2009-03-13 0.80902778 series2 0.96493056
# 198 2009-03-14 0.98958333 series2 0.97658592
# 199 2009-03-15 0.99305556 series2 NA
# 200 2009-03-16 0.95486111 series2 NA
# 201 2009-03-17 0.97916667 series2 NA
# 202 2009-03-18 0.98958333 series2 NA
# 203 2009-03-19 0.98263889 series2 NA
# 204 2009-03-20 0.98958333 series2 NA
# 205 2009-03-21 1.00000000 series2 NA
# 206 2009-03-22 0.96180556 series2 NA
# 207 2009-03-23 0.92558140 series2 NA
Where I would expect the last 9 rows of each series to be NA
, not just one series. We can fix that with:
df$rolly <- ave(df$x, df$group, FUN = function(Z) zoo::rollmean(Z, 10, fill=NA, align='left'))
df[c(1:3, 82:97, 196:207),]
# date x group rolly
# 1 2008-12-29 0.85964912 series1 0.68249269
# 2 2008-12-30 0.58333333 series1 0.67118056
# 3 2008-12-31 0.86805556 series1 0.68541667
# 82 2009-03-20 0.97916667 series1 0.97638889
# 83 2009-03-21 0.94444444 series1 0.97812500
# 84 2009-03-22 0.98958333 series1 0.98298611
# 85 2009-03-23 1.00000000 series1 0.98402778
# 86 2009-03-24 0.98611111 series1 NA
# 87 2009-03-25 0.95138889 series1 NA
# 88 2009-03-26 0.97916667 series1 NA
# 89 2009-03-27 0.99305556 series1 NA
# 90 2009-03-28 0.98958333 series1 NA
# 91 2009-03-29 0.95138889 series1 NA
# 92 2009-03-30 0.99652778 series1 NA
# 93 2009-03-31 0.99305556 series1 NA
# 94 2009-04-01 1.00000000 series1 NA
# 95 2008-12-01 0.03900709 series2 0.06119238
# 96 2008-12-02 0.01736111 series2 0.05798611
# 97 2008-12-03 0.22916667 series2 0.07222222
# 196 2009-03-12 0.99305556 series2 0.96805556
# 197 2009-03-13 0.80902778 series2 0.96493056
# 198 2009-03-14 0.98958333 series2 0.97658592
# 199 2009-03-15 0.99305556 series2 NA
# 200 2009-03-16 0.95486111 series2 NA
# 201 2009-03-17 0.97916667 series2 NA
# 202 2009-03-18 0.98958333 series2 NA
# 203 2009-03-19 0.98263889 series2 NA
# 204 2009-03-20 0.98958333 series2 NA
# 205 2009-03-21 1.00000000 series2 NA
# 206 2009-03-22 0.96180556 series2 NA
# 207 2009-03-23 0.92558140 series2 NA
or, if you're comfortable with dplyr
, then
library(dplyr)
df %>%
group_by(group) %>%
mutate(rolly = zoo::rollmean(x, 10, fill=NA, align='left')) %>%
ungroup() %>%
ggplot(aes(x=date, y=x, color=group))
geom_line(aes(y=rolly, color=group), na.rm= TRUE, size=0.75)