My dataset contains 2 variables Y and X. Y was measured every 1.0 seconds.
I am trying to calculate the average slope within a moving 60-second-window, i.e. after calculating the first 60-second slope value the window moves forward one time unit (1.0 seconds) and calculates the next 60-second-window, producing successive 60-second slope values at 1.0-second increments.
My data:
dput(Dataexample)
structure(list(X = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41",
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63",
"64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74",
"75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85",
"86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96",
"97", "98", "99", "100", "101", "102", "103", "104", "105", "106",
"107", "108", "109", "110", "111", "112", "113", "114", "115",
"116", "117", "118", "119", "120", "121", "122", "123", "124",
"125", "126", "127", "128", "129", "130", "131", "132", "133",
"134", "135", "136", "137", "138", "139", "140", "141", "142",
"143", "144", "145", "146", "147", "148", "149", "150", "151",
"152", "153", "154", "155", "156", "157", "158", "159", "160",
"161", "162", "163", "164", "165", "166", "167", "168", "169",
"170", "171", "172", "173", "174", "175", "176", "177", "178",
"179", "180", "181", "182", "183", "184", "185", "186", "187",
"188", "189", "190", "191", "192", "193", "194", "195", "196",
"197", "198", "199", "200", "201", "202", "203", "204", "205",
"206", "207", "208", "209", "210", "211", "212", "213", "214",
"215", "216", "217", "218", "219", "220", "221", "222", "223",
"224", "225", "226", "227", "228", "229", "230", "231", "232",
"233", "234", "235", "236", "237", "238", "239", "240", "241",
"242", "243", "244", "245", "246", "247", "248", "249", "250",
"251", "252", "253", "254", "255", "256", "257", "258", "259",
"260", "261", "262", "263", "264", "265", "266", "267", "268",
"269", "270", "271", "272", "273", "274", "275", "276", "277",
"278", "279", "280", "281", "282", "283", "284", "285", "286",
"287", "288", "289", "290", "291", "292", "293", "294", "295",
"296", "297", "298", "299", "300", "301", "302", "303", "304",
"305", "306", "307", "308", "309", "310", "311", "312", "313",
"314", "315", "316", "317", "318", "319", "320", "321", "322",
"323", "324", "325", "326", "327", "328", "329", "330", "331",
"332", "333", "334", "335", "336", "337", "338", "339", "340",
"341", "342", "343", "344", "345", "346", "347", "348", "349",
"350", "351", "352", "353", "354", "355", "356", "357", "358",
"359", "360", "361", "362", "363", "364", "365", "366", "367",
"368", "369", "370", "371", "372", "373", "374", "375", "376",
"377", "378", "379", "380", "381", "382", "383", "384", "385",
"386", "387", "388", "389", "390", "391", "392", "393", "394",
"395", "396", "397", "398", "399", "400", "401", "402", "403",
"404", "405", "406", "407", "408", "409", "410", "411", "412",
"413", "414", "415", "416", "417", "418", "419"), Y = c(25221.6738,
25220.66, 25079.83, 24912.6719, 24801.24, 24791.1113, 24431.54,
24504.46, 24086.21, 24504.46, 24109.5, 24165.1953, 23999.1289,
23802.7012, 23838.1387, 23899.9, 23585.0371, 23605.2832, 23861.4258,
23675.1367, 23642.74, 23565.8027, 23378.5313, 23227.7129, 23105.248,
23393.7148, 23186.2168, 23577.9512, 23230.75, 23186.2168, 23057.68,
23261.1152, 23282.3711, 23184.1914, 23314.7617, 23223.666, 23023.27,
23059.7031, 22867.4219, 23047.5586, 23114.3555, 22960.5234, 23266.1758,
22824.92, 22910.9355, 22863.373, 22891.709, 22997.9688, 22873.4941,
22948.38, 22844.1465, 22969.6328, 22935.2246, 22983.8, 23078.9336,
22965.584, 22814.8, 22815.8125, 22874.5059, 22632.66, 22800.6328,
22715.6328, 22515.291, 22679.2051, 22525.4082, 22670.1, 22506.1855,
22595.2227, 22741.9434, 22609.3887, 22727.7754, 22442.4453, 22246.1777,
22361.5078, 22453.5742, 22643.791, 22587.1289, 22567.9043, 22681.23,
22590.1641, 22327.11, 22453.5742, 22350.3789, 22566.8926, 22558.7988,
22583.082, 22765.2168, 22519.3379, 22290.69, 22326.0977, 22236.06,
22662.0039, 22365.5547, 22259.3281, 22511.2441, 22284.62, 22480.89,
22422.209, 22483.9258, 22653.91, 22365.5547, 22342.2852, 22463.6914,
22160.19, 22426.2559, 22537.55, 22536.54, 22425.2441, 22251.2363,
22214.8164, 22264.3867, 22654.9219, 22342.2852, 22364.543, 22542.61,
22283.6074, 22463.6914, 22373.6484, 22276.5273, 22276.5273, 22168.2832,
22219.875, 22026.6641, 22409.0566, 22147.04, 22120.7383, 22374.66,
22304.8535, 22303.8418, 22459.6445, 22122.7617, 22236.06, 22427.2676,
22147.04, 22449.5273, 22597.2461, 22425.2441, 22289.6777, 22298.7832,
22232.0137, 22486.96, 22350.3789, 22201.666, 22234.0371, 22220.8867,
22280.5723, 22321.04, 21867.8633, 22328.1211, 22327.11, 22261.3516,
22585.1055, 22146.0273, 22210.77, 22201.666, 22185.48, 22249.2129,
22399.9512, 22549.6914, 22308.9, 22516.3027, 21998.3418, 22302.83,
22014.5273, 22014.5273, 22219.875, 22339.25, 22000.3652, 22064.0918,
22344.3086, 22044.873, 22294.7363, 22187.5039, 22102.5313, 22502.1387,
21872.92, 22168.2832, 22146.0273, 22128.832, 22114.67, 22155.1328,
22384.7754, 22300.8066, 22396.916, 22442.4453, 22141.9824, 22149.0625,
22114.67, 22259.3281, 22226.957, 22248.2012, 22263.375, 22258.3164,
22131.8652, 22272.48, 22486.96, 22212.793, 22171.3184, 22275.5156,
22307.8887, 22268.4336, 22116.6914, 21950.8027, 22266.41, 22095.45,
22101.52, 22125.7969, 22306.877, 22305.8652, 22355.4375, 22292.7129,
22089.38, 22208.748, 22183.457, 22162.2129, 22125.7969, 22440.4219,
22216.84, 22016.55, 22293.7246, 22112.6465, 22210.77, 22134.9,
21936.6426, 21776.8359, 22160.19, 22298.7832, 22342.2852, 21995.3086,
22223.9219, 22153.11, 22383.7637, 22341.2734, 22082.2988, 22198.63,
22192.56, 22138.9473, 22112.6465, 22203.69, 22054.9883, 22169.2949,
22312.9473, 21971.0313, 22088.37, 22319.0156, 22162.2129, 22279.5625,
22119.7266, 22115.6816, 22225.9453, 22293.7246, 22157.1563, 22151.0859,
22192.56, 22263.375, 22256.293, 22171.3184, 22124.7852, 22401.9746,
22038.8027, 22063.08, 22408.0449, 22057.0117, 21960.918, 22160.19,
22134.9, 22029.7, 22470.7734, 22347.3438, 22264.3867, 22182.4453,
21969.01, 22358.4727, 22286.6426, 22366.5664, 22282.5957, 22257.3047,
22162.2129, 22034.7578, 22157.1563, 22247.19, 22131.8652, 21848.6465,
22043.8613, 21923.4922, 22258.3164, 22313.959, 22058.0215, 22003.4,
22167.2715, 21953.8379, 22254.27, 22152.0977, 22206.7246, 22085.334,
22266.41, 22323.0625, 22134.9, 22083.31, 22401.9746, 22190.54,
22180.4219, 22040.8262, 21978.1133, 22149.0625, 22346.332, 22270.457,
22111.6348, 22108.6, 22408.0449, 22468.75, 22222.91, 21854.7148,
22379.7188, 22179.41, 21972.043, 22097.4727, 22436.375, 22059.0332,
22215.8281, 22311.9355, 22369.6016, 22148.05, 22227.9688, 22094.4375,
22378.707, 22169.2949, 22186.4922, 21977.1016, 22264.3867, 21984.1816,
22001.377, 22267.4219, 22164.2363, 22066.1152, 21814.2578, 22160.19,
21970.0215, 22209.7578, 22081.2871, 22338.2383, 22215.8281, 22042.85,
22214.8164, 22413.1035, 22173.3418, 22216.84, 22203.69, 22125.7969,
22038.8027, 22171.3184, 22215.8281, 21966.9863, 22226.957, 22121.75,
22021.6074, 22036.78, 22213.8047, 22313.959, 22115.6816, 22095.45,
22242.13, 22127.82, 22082.2988, 22466.7266, 22265.3984, 22276.5273,
22309.9121, 22352.4023, 22274.5039, 22183.457, 22402.9863, 22188.5156,
22141.9824, 22173.3418, 21995.3086, 22171.3184, 22079.2656, 22151.0859,
21746.4941, 21983.17, 22153.11, 22224.9336, 22115.6816, 22020.5957,
21894.16, 21947.7676, 22277.54, 22016.55, 21941.7, 22276.5273,
22113.6582, 22126.8086, 22016.55, 22374.66, 22009.4688, 22099.4961,
21991.2617, 21886.0684, 22161.2012, 22019.584, 22283.6074, 22202.6777,
22022.62, 22289.6777, 21913.3789, 21958.8945, 22125.7969, 22244.1543,
22041.8379, 22146.0273, 22120.7383, 21997.332, 21907.3086, 22010.48,
22167.2715, 21927.54, 21964.9629, 21955.86)), row.names = c(NA,
-419L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
You could do this with a loop:
Dataexample$X <- as.numeric(Dataexample$X)
slopes <- rep(NA, nrow(Dataexample)-59)
for( i in 1:length(slopes)){
slopes[i] <- lm(Y ~ X, data=Dataexample[i:(i 59), ])$coefficients[2]
}
CodePudding user response:
The package zoo
has a useful function rollapply
for this sort of task where you want to do a more generalised version of a moving average. For your example, you could do it as:
library(zoo)
Dataexample$X <- as.numeric(Dataexample$X)
rollapply(Dataexample, 60, function(d)lm(Y~X, data.frame(d))$coefficients, by.column=FALSE)
# (Intercept) X
#[1,] 24563.00 -35.13712880
#[2,] 24501.07 -33.33658109
#[3,] 24434.35 -31.54255629
#[4,] 24377.38 -30.17644875
#[5,] 24318.79 -28.68243440
#...
#[360,] 23397.00 -3.29681473
In this case the X
coefficient is the slope that you're interested in.
It's necessary to convert the X
column to numeric because rollapply seems to be using a matrix to store the input data in, so all columns need to be of the same type.
CodePudding user response:
lm(y ~ x)
can accept a matrix y
and, on such a case, "...a linear model is fitted separately by least-squares to each column of the matrix" (?lm
, Details section). Hence, we can (if memory is sufficient) construct all 60-second windows, of y
, beforehand (as a matrix) and pass it to lm
:
n = 60 # window
Make design matrix:
X = cbind(1, 1:n) # X[, 1]: intercept
Make response as a matrix of 60-second windows (by column):
iY = sequence(rep_len(n, length(Dataexample$Y) - n 1),
1:(length(Dataexample$Y) - n 1)) # indices to subset Y
Y = matrix(Dataexample$Y[iY], n)
Run lm
to retrieve slopes:
slopes1 = lm.fit(X, Y)$coefficients[2, ]
As a more efficient alternative, to avoid the overhead of fiting a model, we can calculate the slope manually (from a formula found online):
Where we have:
n = 60 # window
A sequence of indices to subset 'x' and 'y' by a 60-sec window:
i = sequence(rep_len(n, length(Dataexample$Y) - n 1),
1:(length(Dataexample$Y) - n 1))
Get 'x' and 'y':
X = matrix(as.numeric(Dataexample$X)[i], n)
Y = matrix(Dataexample$Y[i], n)
Compute all slopes at once:
Sxy = colSums(X * Y)
Sx = colSums(X)
Sy = colSums(Y)
slopes2 = ((n * Sxy) - (Sx * Sy)) / ((n * colSums(X ^ 2)) - (Sx ^ 2))
And check:
all.equal(slopes1, slopes2)
#[1] TRUE