I have been using dplyr::summarise to sum daily data by month and have just spent a few hours finally figuring out that for whatever reason the decimal places in my raw data was throwing the function off.
Basically, the monthly sums for 'temp1' (which is the 'Mod' data) are wrong (the digits look right but the decimal place is in the wrong place) until I apply a rounding function (rounded to 7 or less digits fixes the problem it seems, 8 just reproduces the problem)
temp1 <- as.data.frame(read.csv("data/ModObs.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
((((temp1$Obs <- round(temp1$Obs, 7))))#this line fixes the problem
((((temp1$Mod <- round(temp1$Mod, 7))))#this line fixes the problem
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
temp2 <- dplyr::group_by(temp1, yearmonth)
temp2 <- dplyr::summarise(temp2, Obs = sum(Obs, na.rm=TRUE))
temp1 <- dplyr::group_by(temp1, yearmonth)
temp1 <- dplyr::summarise(temp1, Mod = sum(Mod, na.rm=TRUE))
My question is, is there a way of using dplyr::summarise that means I would not have encountered this issue (which took me quite a while to figure out and solve)?
dput with error:
dput(temp1) structure(list(yearmonth = structure(c(0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546, 577, 608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943, 974, 1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277, 1308, 1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612, 1642, 1673, 1704, 1734, 1765, 1795, 1826, 1857, 1885, 1916, 1946, 1977, 2007, 2038, 2069, 2099, 2130, 2160, 2191, 2222, 2251, 2282, 2312, 2343, 2373, 2404, 2435, 2465, 2496, 2526, 2557, 2588, 2616, 2647, 2677, 2708, 2738, 2769, 2800, 2830, 2861, 2891, 2922, 2953, 2981, 3012, 3042, 3073, 3103, 3134, 3165, 3195, 3226, 3256, 3287, 3318, 3346, 3377, 3407, 3438, 3468, 3499, 3530, 3560, 3591, 3621, 3652, 3683, 3712, 3743, 3773, 3804, 3834, 3865, 3896, 3926, 3957, 3987, 4018, 4049, 4077, 4108, 4138, 4169, 4199, 4230, 4261, 4291, 4322, 4352, 4383, 4414, 4442, 4473, 4503, 4534, 4564, 4595, 4626, 4656, 4687, 4717, 4748, 4779, 4807, 4838, 4868, 4899, 4929, 4960, 4991, 5021, 5052, 5082, 5113, 5144, 5173, 5204, 5234, 5265, 5295, 5326, 5357, 5387, 5418, 5448, 5479, 5510, 5538, 5569, 5599, 5630, 5660, 5691, 5722, 5752, 5783, 5813, 5844, 5875, 5903, 5934, 5964, 5995, 6025, 6056, 6087, 6117, 6148, 6178, 6209, 6240, 6268, 6299, 6329, 6360, 6390, 6421, 6452, 6482, 6513, 6543, 6574, 6605, 6634, 6665, 6695, 6726, 6756, 6787, 6818, 6848, 6879, 6909, 6940, 6971, 6999, 7030, 7060, 7091, 7121, 7152, 7183, 7213, 7244, 7274), class = "Date"), Obs = c(29.5, 1.6, 225.9, 305, 485.9, 392.6, 86.7, 422.1, 262.6, 22.6, 12.7, 40.3, 96.5, 1.4, 0, 40.1, 251.9, 494.4, 181.6, 86, 69.6, 128.4, 560.9, 55.8, 14.2, 94.1, 8.9, 28.2, 10.3, 1.4, 2.6, 9, 14.2, 5.6, 9.5, 1.6, 0, 195.1, 24.3, 18.9, 26, 34.3, 68.1, 144.1, 213.3, 99.2, 36.1, 17, 19.5, 13.3, 0, 352.7, 2812.7, 82, 311.9, 314.2, 397.5, 783.7, 388.7, 40.4, 1.3, 0, 2.1, 2.9, 2.1, 8.6, 1.2, 260.8, 494.4, 1125.1, 983.3, 20.9, 3.2, 0, 0.1, 1.1, 0.6, 32.9, 16.2, 124.1, 204.3, 340.6, 212.6, 42.8, 56.7, 2.2, 22.6, 78.3, 100.2, 786.4, 990.7, 330.2, 119.9, 2.7, 3.5, 5.7, 9.4, 17.6, 104.2, 168.9, 43, 118.4, 69.7, 479.3, 435.4, 101.4, 52.9, 104, 28.6, 0.8, 1.8, 13.1, 54.4, 26.2, 6.3, 24.8, 30.7, 53.9, 1.4, 0, 4.6, 0, 1.5, 14.5, 67.3, 217.4, 460.5, 251.2, 72.3, 29.3, 57.7, 29.7, 1.4, 1, 30.4, 3.6, 164.2, 378.4, 861.1, 982.5, 116.5, 68.4, 33.9, 1.5, 52.3, 7.5, 18.2, 70.4, 75.9, 158.9, 26.1, 2.3, 6, 8.4, 0.1, 43.9, 8.3, 0, 0.7, 8.6, 38.3, 24.2, 110.1, 164.1, 239, 120.8, 23.9, 24.7, 1.8, 1.4, 54.7, 75.6, 11.6, 19.6, 69.4, 199.7, 648.3, 260.9, 53.5, 4.5, 0.5, 0, 0.6, 60.9, 9, 130.9, 61.7, 539.5, 222.1, 31.6, 19.8, 288.6, 83.3, 2, 1.8, 104.4, 214, 108.3, 504.2, 152.6, 110.2, 103.3, 14.7, 128.2, 3.5, 2.5, 7.2, 47.4, 73.6, 116.2, 150.6, 161.1, 58.8, 32.9, 12.1, 33.3, 31.3, 0.7, 39.7, 0.3, 26, 102.2, 55.2, 46.3, 62.3, 15.4, 200.2, 98.9, 35.5, 0.4, 80.3, 286.5, 348.8, 646.5, 340.3, 1048.5, 558.6, 365.5, 129.4, 3.7), Mod = c(58.456732574, 0.647399496, 106.77816386, 267.838017351, 599.939323463, 250.80934844, 113.281660213, 241.663996002, 127.530387061, 52.687410089, 84.890244021, 41.364802773, 59.23208781, 8.497558874, 0.672761812, 15.465132304, 358.926445816, 399.9093607, 97.971842098, 42.72450411, 78.475537521, 267.696647395, 1499.730009232, 164.134543701, 15.739950594, 117.176571603, 0.29960511, 33.153451885, 71.35707594, 1.976493212, 38.99406048, 58.699745671, 88.893788732, 55.590919209, 17.675911123, 0.323688533, 0.802922429, 255.339027286, 86.973361482, 56.672316286, 195.494804037, 113.402888496, 88.016557451, 146.313739207, 141.11162499, 309.49712486, 42.342303882, 32.801816137, 9.804984811, 14.876734504, 0.741273571, 432.148407136, 2516.875488309, 47.539316029, 269.405152962, 183.64372206, 154.563624943, 467.720012557, 153.054373772, 32.514885627, 1.830055421, 0.066762771, 1.044433442, 1.346976081, 17.458179607, 49.907434727, 53.305731876, 353.57856375, 310.529543548, 962.398015832, 344.181844335, 30.810939684, 8.040785393, 0.377896164, 0.798674902, 0.801987649, 0.691369382, 13.928109124, 12.703685263, 137.85141766, 211.000002457, 444.374773665, 187.06473363, 56.579158088, 79.307114494, 1.185915374, 7.450495202, 350.92445957, 168.333585374, 1103.415013415, 530.738230571, 97.400577403, 120.218466778, 26.53863178, 4.95759286, 1.415953207, 1.349259407, 7.598631896, 31.687964985, 111.63067543, 253.033200389, 260.084267318, 174.328538378, 435.075601539, 266.057507136, 169.491413576, 370.501536962, 325.734910145, 52.804905885, 0.637640491, 0.749393501, 61.044014158, 236.352010674, 69.236802018, 50.981912279, 113.755615714, 68.015519965, 228.376481539, 1.35494224, 0.370446501, 1.782754512, 0.200525121, 0.567715904, 2.562157517, 66.360280078, 537.409598471, 548.857756317, 131.818783821, 79.38907511, 216.219977069, 86.85853468, 91.042441797, 1.310072508, 4.6784498, 49.359916771, 12.139100379, 792.36086926, 360.543361637, 674.819587278, 417.14625705, 71.213853069, 88.470327459, 26.501678301, 0.660446628, 1.983262203, 0.053393889, 2.116900185, 103.974625465, 146.1406309, 241.456322328, 41.761031962, 19.165518836, 61.329157567, 77.461701504, 0.717057613, 60.244865985, 2.038796249, 0.024612503, 1.15999722, 0.987871135, 66.752973657, 101.602951298, 207.507552152, 351.874694806, 239.490966404, 373.402963887, 83.392418938, 96.713198206, 2.471314963, 1.789748376, 23.642411238, 274.437164678, 27.196302352, 122.879115856, 203.243972815, 261.450286079, 674.337097864, 201.592587766, 66.457305017, 14.265446489, 11.688820111, 0.287104024, 0.498545345, 10.595714786, 64.36811409, 147.944544256, 105.263660789, 348.781394762, 115.965911604, 81.556952547, 35.877763907, 293.156577573, 122.052605838, 0.891615203, 0.201455399, 17.693507458, 191.862026713, 93.030313466, 379.074639489, 88.590763754, 138.225716958, 438.407332197, 11.107850781, 175.835916749, 0.793580574, 0.755295219, 1.253581528, 2.175241521, 126.829190302, 167.624256025, 261.538659971, 143.633607733, 58.216055381, 101.857571372, 37.192461414, 112.344312062, 100.262190061, 2.378722279, 4.946631624, 0.435401092, 51.481605801, 155.384067186, 153.115869623, 128.316180053, 153.07003862, 80.585921934, 221.89445498, 62.047224666, 105.157734971, 0.203008456, 51.168132113, 310.567488885, 238.329914336, 783.724869869, 193.016695288, 431.815627948, 143.854730373, 358.082995503, 84.620059176, 1.146042245)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -240L))
CodePudding user response:
Your data is exactly the same before and after the rounding.
library(dplyr)
temp1 <- as.data.frame(read.csv("C:/Users/xxx/Documents/modops.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
notfixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
temp1$Obs <- round(temp1$Obs, 7)#this line fixes the problem
fixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
> identical(fixed, notfixed)
[1] TRUE