I have the following dataset, called 'test'
structure(list(time = c(2, 3.9, 5.8, 7.8, 9.7, 11.7, 13.6, 15.5,
17.5, 19.4, 21.4, 23.3, 25.3, 27.2, 29.2, 31.2, 33.1, 35.1, 37.1,
39, 41, 42.9, 44.9, 46.8, 48.7, 50.7, 52.6, 54.6, 56.5, 58.5,
60.4, 62.4, 64.3, 66.2, 68.2, 70.1, 72.1, 74, 76, 77.9, 79.8,
81.8, 83.7, 85.7, 87.6, 89.5, 91.5, 93.4, 95.4, 97.3), v = c(14.82,
14.804, 14.82, 14.82, 14.804, 14.82, 14.812, 14.804, 14.8, 14.808,
14.8, 14.804, 15.844, 15.848, 15.848, 15.852, 15.852, 15.848,
15.852, 15.852, 15.852, 15.852, 15.856, 15.852, 15.852, 15.856,
15.856, 15.856, 15.856, 15.856, 15.856, 15.856, 15.852, 15.852,
15.852, 15.852, 15.856, 15.856, 15.856, 15.86, 15.856, 15.86,
15.864, 15.856, 15.86, 15.86, 15.86, 15.856, 15.86, 15.856),
a = c(1.5, 1.476, 1.5, 1.491, 1.452, 1.476, 1.478, 1.44,
1.454, 1.438, 1.442, 1.471, 0.002, 0.002, 0.002, 0.002, 0.002,
0.002, 0.002, 0.002, 0.001, 0.002, 0.002, 0.001, 0.001, 0.002,
0.002, 0.002, 0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.001,
0.002, 0.002, 0.002, 0.001, 0.002, 0.002, 0.002, 0.002, 0.002,
0.001, 0.002, 0.002, 0.002, 0.001, 0.002), t1 = c(14.61,
14.61, 14.61, 14.61, 14.61, 14.61, 14.61, 14.62, 14.62, 14.63,
14.64, 14.65, 14.67, 14.7, 14.72, 14.75, 14.78, 14.82, 14.85,
14.89, 14.93, 14.97, 15.01, 15.05, 15.09, 15.13, 15.17, 15.2,
15.23, 15.26, 15.29, 15.32, 15.34, 15.37, 15.39, 15.41, 15.43,
15.45, 15.46, 15.48, 15.49, 15.51, 15.52, 15.53, 15.54, 15.55,
15.56, 15.56, 15.57, 15.58), t2 = c(14.63, 14.62, 14.62,
14.62, 14.62, 14.62, 14.62, 14.63, 14.63, 14.64, 14.65, 14.67,
14.69, 14.71, 14.74, 14.78, 14.82, 14.86, 14.9, 14.95, 15,
15.06, 15.11, 15.16, 15.22, 15.27, 15.31, 15.36, 15.4, 15.44,
15.48, 15.51, 15.55, 15.58, 15.61, 15.63, 15.65, 15.68, 15.7,
15.71, 15.73, 15.75, 15.76, 15.77, 15.78, 15.79, 15.8, 15.81,
15.82, 15.83), t3 = c(14.63, 14.63, 14.63, 14.63, 14.63,
14.63, 14.63, 14.63, 14.64, 14.65, 14.66, 14.68, 14.7, 14.73,
14.75, 14.79, 14.83, 14.86, 14.91, 14.95, 15, 15.04, 15.09,
15.13, 15.18, 15.22, 15.26, 15.3, 15.33, 15.37, 15.4, 15.43,
15.46, 15.48, 15.5, 15.53, 15.54, 15.56, 15.58, 15.59, 15.61,
15.62, 15.63, 15.64, 15.65, 15.66, 15.67, 15.67, 15.68, 15.69
), t4 = c(14.65, 14.65, 14.65, 14.65, 14.64, 14.64, 14.65,
14.65, 14.66, 14.67, 14.68, 14.7, 14.73, 14.75, 14.79, 14.82,
14.86, 14.91, 14.95, 15, 15.05, 15.1, 15.16, 15.21, 15.25,
15.3, 15.35, 15.39, 15.43, 15.46, 15.5, 15.53, 15.56, 15.59,
15.62, 15.64, 15.66, 15.68, 15.7, 15.72, 15.73, 15.75, 15.76,
15.77, 15.78, 15.79, 15.8, 15.81, 15.82, 15.82), t5 = c(14.65,
14.65, 14.65, 14.65, 14.65, 14.65, 14.66, 14.66, 14.67, 14.69,
14.7, 14.73, 14.75, 14.78, 14.81, 14.85, 14.89, 14.93, 14.97,
15.02, 15.06, 15.11, 15.15, 15.2, 15.24, 15.28, 15.32, 15.36,
15.4, 15.43, 15.46, 15.49, 15.52, 15.54, 15.57, 15.59, 15.61,
15.63, 15.64, 15.66, 15.68, 15.69, 15.7, 15.71, 15.72, 15.73,
15.74, 15.75, 15.76, 15.76), t6 = c(14.63, 14.63, 14.63,
14.63, 14.63, 14.63, 14.63, 14.64, 14.65, 14.66, 14.67, 14.69,
14.72, 14.74, 14.77, 14.8, 14.84, 14.88, 14.91, 14.95, 15,
15.04, 15.08, 15.12, 15.16, 15.2, 15.23, 15.27, 15.3, 15.33,
15.36, 15.39, 15.41, 15.44, 15.46, 15.48, 15.5, 15.52, 15.54,
15.55, 15.57, 15.58, 15.59, 15.61, 15.62, 15.63, 15.64, 15.65,
15.65, 15.66), t7 = c(14.64, 14.64, 14.64, 14.64, 14.64,
14.64, 14.64, 14.64, 14.65, 14.66, 14.67, 14.68, 14.7, 14.72,
14.75, 14.77, 14.8, 14.83, 14.86, 14.9, 14.93, 14.97, 15,
15.04, 15.07, 15.11, 15.14, 15.17, 15.21, 15.24, 15.26, 15.29,
15.32, 15.34, 15.36, 15.38, 15.41, 15.42, 15.44, 15.46, 15.47,
15.49, 15.5, 15.52, 15.53, 15.54, 15.55, 15.56, 15.57, 15.58
), t8 = c(14.6, 14.6, 14.6, 14.6, 14.6, 14.6, 14.61, 14.61,
14.62, 14.63, 14.64, 14.66, 14.68, 14.71, 14.74, 14.77, 14.8,
14.84, 14.88, 14.92, 14.96, 15, 15.03, 15.07, 15.11, 15.14,
15.17, 15.21, 15.24, 15.26, 15.29, 15.31, 15.34, 15.36, 15.38,
15.39, 15.41, 15.43, 15.44, 15.45, 15.47, 15.48, 15.49, 15.5,
15.51, 15.52, 15.52, 15.53, 15.54, 15.54)), row.names = c(NA,
50L), class = "data.frame")
This dataset has 8 columns (t1-t8) on which I want to do a correction. This is done by taking the mean of the first 5 rows per column:
colmeans = colMeans(test[1:5,4:11])
and substracting those means from the dataset
test[, paste0('t', 1:8)] = test[, paste0('t', 1:8)] - colmeans
However, something is going wrong in this substraction, but I can't figure out what.
The outcome of the code above doesn't match the outcome when done by hand for a single row:
test2[1, paste0('t', 1:8)] - colmeans
(test2 is the same as the original test dataset)
The correct outcome of the substraction for the first line is:
t1 t2 t3 t4 t5 t6 t7 t8
>1 0 0.008 0 0.002 0 0 0 0
But the outcome I get with the code is wrong:
t1 t2 t3 t4 t5 t6 t7 t8
>1 0.000 -0.018 -0.010 0.028 0.000 0.030 0.010 -0.030
I know the outcome done by hand is correct, So what is wrong with my code?
CodePudding user response:
This is because the computation is done across rows (instead of columns). You could t
ranpose twice to correct the problem:
t(t(test[, paste0('t', 1:8)]) - colmeans)
t1 t2 t3 t4 t5 t6 t7 t8
1 0.00 0.008 0.00 0.002 0.00 0.00 0.00 0.00
2 0.00 -0.002 0.00 0.002 0.00 0.00 0.00 0.00
3 0.00 -0.002 0.00 0.002 0.00 0.00 0.00 0.00
4 0.00 -0.002 0.00 0.002 0.00 0.00 0.00 0.00
5 0.00 -0.002 0.00 -0.008 0.00 0.00 0.00 0.00
6 0.00 -0.002 0.00 -0.008 0.00 0.00 0.00 0.00
7 0.00 -0.002 0.00 0.002 0.01 0.00 0.00 0.01
8 0.01 0.008 0.00 0.002 0.01 0.01 0.00 0.01
9 0.01 0.008 0.01 0.012 0.02 0.02 0.01 0.02
10 0.02 0.018 0.02 0.022 0.04 0.03 0.02 0.03