Home > database >  Finding the slope of a linear trend line in a moving window in R
Finding the slope of a linear trend line in a moving window in R

Time:03-02

My dataset contains 2 variables Y and X. Y was measured every 1.0 seconds.

I am trying to calculate the average slope within a moving 60-second-window, i.e. after calculating the first 60-second slope value the window moves forward one time unit (1.0 seconds) and calculates the next 60-second-window, producing successive 60-second slope values at 1.0-second increments.

My data:

 dput(Dataexample)
structure(list(X = c("1", "2", "3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", 
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", 
"53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", 
"64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", 
"75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", 
"86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", 
"97", "98", "99", "100", "101", "102", "103", "104", "105", "106", 
"107", "108", "109", "110", "111", "112", "113", "114", "115", 
"116", "117", "118", "119", "120", "121", "122", "123", "124", 
"125", "126", "127", "128", "129", "130", "131", "132", "133", 
"134", "135", "136", "137", "138", "139", "140", "141", "142", 
"143", "144", "145", "146", "147", "148", "149", "150", "151", 
"152", "153", "154", "155", "156", "157", "158", "159", "160", 
"161", "162", "163", "164", "165", "166", "167", "168", "169", 
"170", "171", "172", "173", "174", "175", "176", "177", "178", 
"179", "180", "181", "182", "183", "184", "185", "186", "187", 
"188", "189", "190", "191", "192", "193", "194", "195", "196", 
"197", "198", "199", "200", "201", "202", "203", "204", "205", 
"206", "207", "208", "209", "210", "211", "212", "213", "214", 
"215", "216", "217", "218", "219", "220", "221", "222", "223", 
"224", "225", "226", "227", "228", "229", "230", "231", "232", 
"233", "234", "235", "236", "237", "238", "239", "240", "241", 
"242", "243", "244", "245", "246", "247", "248", "249", "250", 
"251", "252", "253", "254", "255", "256", "257", "258", "259", 
"260", "261", "262", "263", "264", "265", "266", "267", "268", 
"269", "270", "271", "272", "273", "274", "275", "276", "277", 
"278", "279", "280", "281", "282", "283", "284", "285", "286", 
"287", "288", "289", "290", "291", "292", "293", "294", "295", 
"296", "297", "298", "299", "300", "301", "302", "303", "304", 
"305", "306", "307", "308", "309", "310", "311", "312", "313", 
"314", "315", "316", "317", "318", "319", "320", "321", "322", 
"323", "324", "325", "326", "327", "328", "329", "330", "331", 
"332", "333", "334", "335", "336", "337", "338", "339", "340", 
"341", "342", "343", "344", "345", "346", "347", "348", "349", 
"350", "351", "352", "353", "354", "355", "356", "357", "358", 
"359", "360", "361", "362", "363", "364", "365", "366", "367", 
"368", "369", "370", "371", "372", "373", "374", "375", "376", 
"377", "378", "379", "380", "381", "382", "383", "384", "385", 
"386", "387", "388", "389", "390", "391", "392", "393", "394", 
"395", "396", "397", "398", "399", "400", "401", "402", "403", 
"404", "405", "406", "407", "408", "409", "410", "411", "412", 
"413", "414", "415", "416", "417", "418", "419"), Y = c(25221.6738, 
25220.66, 25079.83, 24912.6719, 24801.24, 24791.1113, 24431.54, 
24504.46, 24086.21, 24504.46, 24109.5, 24165.1953, 23999.1289, 
23802.7012, 23838.1387, 23899.9, 23585.0371, 23605.2832, 23861.4258, 
23675.1367, 23642.74, 23565.8027, 23378.5313, 23227.7129, 23105.248, 
23393.7148, 23186.2168, 23577.9512, 23230.75, 23186.2168, 23057.68, 
23261.1152, 23282.3711, 23184.1914, 23314.7617, 23223.666, 23023.27, 
23059.7031, 22867.4219, 23047.5586, 23114.3555, 22960.5234, 23266.1758, 
22824.92, 22910.9355, 22863.373, 22891.709, 22997.9688, 22873.4941, 
22948.38, 22844.1465, 22969.6328, 22935.2246, 22983.8, 23078.9336, 
22965.584, 22814.8, 22815.8125, 22874.5059, 22632.66, 22800.6328, 
22715.6328, 22515.291, 22679.2051, 22525.4082, 22670.1, 22506.1855, 
22595.2227, 22741.9434, 22609.3887, 22727.7754, 22442.4453, 22246.1777, 
22361.5078, 22453.5742, 22643.791, 22587.1289, 22567.9043, 22681.23, 
22590.1641, 22327.11, 22453.5742, 22350.3789, 22566.8926, 22558.7988, 
22583.082, 22765.2168, 22519.3379, 22290.69, 22326.0977, 22236.06, 
22662.0039, 22365.5547, 22259.3281, 22511.2441, 22284.62, 22480.89, 
22422.209, 22483.9258, 22653.91, 22365.5547, 22342.2852, 22463.6914, 
22160.19, 22426.2559, 22537.55, 22536.54, 22425.2441, 22251.2363, 
22214.8164, 22264.3867, 22654.9219, 22342.2852, 22364.543, 22542.61, 
22283.6074, 22463.6914, 22373.6484, 22276.5273, 22276.5273, 22168.2832, 
22219.875, 22026.6641, 22409.0566, 22147.04, 22120.7383, 22374.66, 
22304.8535, 22303.8418, 22459.6445, 22122.7617, 22236.06, 22427.2676, 
22147.04, 22449.5273, 22597.2461, 22425.2441, 22289.6777, 22298.7832, 
22232.0137, 22486.96, 22350.3789, 22201.666, 22234.0371, 22220.8867, 
22280.5723, 22321.04, 21867.8633, 22328.1211, 22327.11, 22261.3516, 
22585.1055, 22146.0273, 22210.77, 22201.666, 22185.48, 22249.2129, 
22399.9512, 22549.6914, 22308.9, 22516.3027, 21998.3418, 22302.83, 
22014.5273, 22014.5273, 22219.875, 22339.25, 22000.3652, 22064.0918, 
22344.3086, 22044.873, 22294.7363, 22187.5039, 22102.5313, 22502.1387, 
21872.92, 22168.2832, 22146.0273, 22128.832, 22114.67, 22155.1328, 
22384.7754, 22300.8066, 22396.916, 22442.4453, 22141.9824, 22149.0625, 
22114.67, 22259.3281, 22226.957, 22248.2012, 22263.375, 22258.3164, 
22131.8652, 22272.48, 22486.96, 22212.793, 22171.3184, 22275.5156, 
22307.8887, 22268.4336, 22116.6914, 21950.8027, 22266.41, 22095.45, 
22101.52, 22125.7969, 22306.877, 22305.8652, 22355.4375, 22292.7129, 
22089.38, 22208.748, 22183.457, 22162.2129, 22125.7969, 22440.4219, 
22216.84, 22016.55, 22293.7246, 22112.6465, 22210.77, 22134.9, 
21936.6426, 21776.8359, 22160.19, 22298.7832, 22342.2852, 21995.3086, 
22223.9219, 22153.11, 22383.7637, 22341.2734, 22082.2988, 22198.63, 
22192.56, 22138.9473, 22112.6465, 22203.69, 22054.9883, 22169.2949, 
22312.9473, 21971.0313, 22088.37, 22319.0156, 22162.2129, 22279.5625, 
22119.7266, 22115.6816, 22225.9453, 22293.7246, 22157.1563, 22151.0859, 
22192.56, 22263.375, 22256.293, 22171.3184, 22124.7852, 22401.9746, 
22038.8027, 22063.08, 22408.0449, 22057.0117, 21960.918, 22160.19, 
22134.9, 22029.7, 22470.7734, 22347.3438, 22264.3867, 22182.4453, 
21969.01, 22358.4727, 22286.6426, 22366.5664, 22282.5957, 22257.3047, 
22162.2129, 22034.7578, 22157.1563, 22247.19, 22131.8652, 21848.6465, 
22043.8613, 21923.4922, 22258.3164, 22313.959, 22058.0215, 22003.4, 
22167.2715, 21953.8379, 22254.27, 22152.0977, 22206.7246, 22085.334, 
22266.41, 22323.0625, 22134.9, 22083.31, 22401.9746, 22190.54, 
22180.4219, 22040.8262, 21978.1133, 22149.0625, 22346.332, 22270.457, 
22111.6348, 22108.6, 22408.0449, 22468.75, 22222.91, 21854.7148, 
22379.7188, 22179.41, 21972.043, 22097.4727, 22436.375, 22059.0332, 
22215.8281, 22311.9355, 22369.6016, 22148.05, 22227.9688, 22094.4375, 
22378.707, 22169.2949, 22186.4922, 21977.1016, 22264.3867, 21984.1816, 
22001.377, 22267.4219, 22164.2363, 22066.1152, 21814.2578, 22160.19, 
21970.0215, 22209.7578, 22081.2871, 22338.2383, 22215.8281, 22042.85, 
22214.8164, 22413.1035, 22173.3418, 22216.84, 22203.69, 22125.7969, 
22038.8027, 22171.3184, 22215.8281, 21966.9863, 22226.957, 22121.75, 
22021.6074, 22036.78, 22213.8047, 22313.959, 22115.6816, 22095.45, 
22242.13, 22127.82, 22082.2988, 22466.7266, 22265.3984, 22276.5273, 
22309.9121, 22352.4023, 22274.5039, 22183.457, 22402.9863, 22188.5156, 
22141.9824, 22173.3418, 21995.3086, 22171.3184, 22079.2656, 22151.0859, 
21746.4941, 21983.17, 22153.11, 22224.9336, 22115.6816, 22020.5957, 
21894.16, 21947.7676, 22277.54, 22016.55, 21941.7, 22276.5273, 
22113.6582, 22126.8086, 22016.55, 22374.66, 22009.4688, 22099.4961, 
21991.2617, 21886.0684, 22161.2012, 22019.584, 22283.6074, 22202.6777, 
22022.62, 22289.6777, 21913.3789, 21958.8945, 22125.7969, 22244.1543, 
22041.8379, 22146.0273, 22120.7383, 21997.332, 21907.3086, 22010.48, 
22167.2715, 21927.54, 21964.9629, 21955.86)), row.names = c(NA, 
-419L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

You could do this with a loop:

Dataexample$X <- as.numeric(Dataexample$X)
  
slopes <- rep(NA, nrow(Dataexample)-59)
for( i in 1:length(slopes)){
  slopes[i] <- lm(Y ~ X, data=Dataexample[i:(i 59), ])$coefficients[2]
}

CodePudding user response:

The package zoo has a useful function rollapply for this sort of task where you want to do a more generalised version of a moving average. For your example, you could do it as:

library(zoo)

Dataexample$X <- as.numeric(Dataexample$X)
rollapply(Dataexample, 60, function(d)lm(Y~X, data.frame(d))$coefficients, by.column=FALSE)

  #    (Intercept)            X
  #[1,]    24563.00 -35.13712880
  #[2,]    24501.07 -33.33658109
  #[3,]    24434.35 -31.54255629
  #[4,]    24377.38 -30.17644875
  #[5,]    24318.79 -28.68243440
  #...
  #[360,]  23397.00  -3.29681473

In this case the X coefficient is the slope that you're interested in.

It's necessary to convert the X column to numeric because rollapply seems to be using a matrix to store the input data in, so all columns need to be of the same type.

CodePudding user response:

lm(y ~ x) can accept a matrix y and, on such a case, "...a linear model is fitted separately by least-squares to each column of the matrix" (?lm, Details section). Hence, we can (if memory is sufficient) construct all 60-second windows, of y, beforehand (as a matrix) and pass it to lm:

n = 60 # window

Make design matrix:

X = cbind(1, 1:n) # X[, 1]: intercept

Make response as a matrix of 60-second windows (by column):

iY = sequence(rep_len(n, length(Dataexample$Y) - n   1), 
              1:(length(Dataexample$Y) - n   1)) # indices to subset Y
Y = matrix(Dataexample$Y[iY], n)

Run lm to retrieve slopes:

slopes1 = lm.fit(X, Y)$coefficients[2, ]

As a more efficient alternative, to avoid the overhead of fiting a model, we can calculate the slope manually (from a formula found online):

eq

Where we have:

n = 60 # window

A sequence of indices to subset 'x' and 'y' by a 60-sec window:

i = sequence(rep_len(n, length(Dataexample$Y) - n   1), 
             1:(length(Dataexample$Y) - n   1))

Get 'x' and 'y':

X = matrix(as.numeric(Dataexample$X)[i], n)
Y = matrix(Dataexample$Y[i], n)

Compute all slopes at once:

Sxy = colSums(X * Y)
Sx = colSums(X)
Sy = colSums(Y)
slopes2 = ((n * Sxy) - (Sx * Sy)) / ((n * colSums(X ^ 2)) - (Sx ^ 2))

And check:

all.equal(slopes1, slopes2)
#[1] TRUE
  •  Tags:  
  • r
  • Related