Home > database >  substracting a vector from a data.frame in R
substracting a vector from a data.frame in R

Time:08-11

I have the following dataset, called 'test'

structure(list(time = c(2, 3.9, 5.8, 7.8, 9.7, 11.7, 13.6, 15.5, 
17.5, 19.4, 21.4, 23.3, 25.3, 27.2, 29.2, 31.2, 33.1, 35.1, 37.1, 
39, 41, 42.9, 44.9, 46.8, 48.7, 50.7, 52.6, 54.6, 56.5, 58.5, 
60.4, 62.4, 64.3, 66.2, 68.2, 70.1, 72.1, 74, 76, 77.9, 79.8, 
81.8, 83.7, 85.7, 87.6, 89.5, 91.5, 93.4, 95.4, 97.3), v = c(14.82, 
14.804, 14.82, 14.82, 14.804, 14.82, 14.812, 14.804, 14.8, 14.808, 
14.8, 14.804, 15.844, 15.848, 15.848, 15.852, 15.852, 15.848, 
15.852, 15.852, 15.852, 15.852, 15.856, 15.852, 15.852, 15.856, 
15.856, 15.856, 15.856, 15.856, 15.856, 15.856, 15.852, 15.852, 
15.852, 15.852, 15.856, 15.856, 15.856, 15.86, 15.856, 15.86, 
15.864, 15.856, 15.86, 15.86, 15.86, 15.856, 15.86, 15.856), 
    a = c(1.5, 1.476, 1.5, 1.491, 1.452, 1.476, 1.478, 1.44, 
    1.454, 1.438, 1.442, 1.471, 0.002, 0.002, 0.002, 0.002, 0.002, 
    0.002, 0.002, 0.002, 0.001, 0.002, 0.002, 0.001, 0.001, 0.002, 
    0.002, 0.002, 0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.001, 
    0.002, 0.002, 0.002, 0.001, 0.002, 0.002, 0.002, 0.002, 0.002, 
    0.001, 0.002, 0.002, 0.002, 0.001, 0.002), t1 = c(14.61, 
    14.61, 14.61, 14.61, 14.61, 14.61, 14.61, 14.62, 14.62, 14.63, 
    14.64, 14.65, 14.67, 14.7, 14.72, 14.75, 14.78, 14.82, 14.85, 
    14.89, 14.93, 14.97, 15.01, 15.05, 15.09, 15.13, 15.17, 15.2, 
    15.23, 15.26, 15.29, 15.32, 15.34, 15.37, 15.39, 15.41, 15.43, 
    15.45, 15.46, 15.48, 15.49, 15.51, 15.52, 15.53, 15.54, 15.55, 
    15.56, 15.56, 15.57, 15.58), t2 = c(14.63, 14.62, 14.62, 
    14.62, 14.62, 14.62, 14.62, 14.63, 14.63, 14.64, 14.65, 14.67, 
    14.69, 14.71, 14.74, 14.78, 14.82, 14.86, 14.9, 14.95, 15, 
    15.06, 15.11, 15.16, 15.22, 15.27, 15.31, 15.36, 15.4, 15.44, 
    15.48, 15.51, 15.55, 15.58, 15.61, 15.63, 15.65, 15.68, 15.7, 
    15.71, 15.73, 15.75, 15.76, 15.77, 15.78, 15.79, 15.8, 15.81, 
    15.82, 15.83), t3 = c(14.63, 14.63, 14.63, 14.63, 14.63, 
    14.63, 14.63, 14.63, 14.64, 14.65, 14.66, 14.68, 14.7, 14.73, 
    14.75, 14.79, 14.83, 14.86, 14.91, 14.95, 15, 15.04, 15.09, 
    15.13, 15.18, 15.22, 15.26, 15.3, 15.33, 15.37, 15.4, 15.43, 
    15.46, 15.48, 15.5, 15.53, 15.54, 15.56, 15.58, 15.59, 15.61, 
    15.62, 15.63, 15.64, 15.65, 15.66, 15.67, 15.67, 15.68, 15.69
    ), t4 = c(14.65, 14.65, 14.65, 14.65, 14.64, 14.64, 14.65, 
    14.65, 14.66, 14.67, 14.68, 14.7, 14.73, 14.75, 14.79, 14.82, 
    14.86, 14.91, 14.95, 15, 15.05, 15.1, 15.16, 15.21, 15.25, 
    15.3, 15.35, 15.39, 15.43, 15.46, 15.5, 15.53, 15.56, 15.59, 
    15.62, 15.64, 15.66, 15.68, 15.7, 15.72, 15.73, 15.75, 15.76, 
    15.77, 15.78, 15.79, 15.8, 15.81, 15.82, 15.82), t5 = c(14.65, 
    14.65, 14.65, 14.65, 14.65, 14.65, 14.66, 14.66, 14.67, 14.69, 
    14.7, 14.73, 14.75, 14.78, 14.81, 14.85, 14.89, 14.93, 14.97, 
    15.02, 15.06, 15.11, 15.15, 15.2, 15.24, 15.28, 15.32, 15.36, 
    15.4, 15.43, 15.46, 15.49, 15.52, 15.54, 15.57, 15.59, 15.61, 
    15.63, 15.64, 15.66, 15.68, 15.69, 15.7, 15.71, 15.72, 15.73, 
    15.74, 15.75, 15.76, 15.76), t6 = c(14.63, 14.63, 14.63, 
    14.63, 14.63, 14.63, 14.63, 14.64, 14.65, 14.66, 14.67, 14.69, 
    14.72, 14.74, 14.77, 14.8, 14.84, 14.88, 14.91, 14.95, 15, 
    15.04, 15.08, 15.12, 15.16, 15.2, 15.23, 15.27, 15.3, 15.33, 
    15.36, 15.39, 15.41, 15.44, 15.46, 15.48, 15.5, 15.52, 15.54, 
    15.55, 15.57, 15.58, 15.59, 15.61, 15.62, 15.63, 15.64, 15.65, 
    15.65, 15.66), t7 = c(14.64, 14.64, 14.64, 14.64, 14.64, 
    14.64, 14.64, 14.64, 14.65, 14.66, 14.67, 14.68, 14.7, 14.72, 
    14.75, 14.77, 14.8, 14.83, 14.86, 14.9, 14.93, 14.97, 15, 
    15.04, 15.07, 15.11, 15.14, 15.17, 15.21, 15.24, 15.26, 15.29, 
    15.32, 15.34, 15.36, 15.38, 15.41, 15.42, 15.44, 15.46, 15.47, 
    15.49, 15.5, 15.52, 15.53, 15.54, 15.55, 15.56, 15.57, 15.58
    ), t8 = c(14.6, 14.6, 14.6, 14.6, 14.6, 14.6, 14.61, 14.61, 
    14.62, 14.63, 14.64, 14.66, 14.68, 14.71, 14.74, 14.77, 14.8, 
    14.84, 14.88, 14.92, 14.96, 15, 15.03, 15.07, 15.11, 15.14, 
    15.17, 15.21, 15.24, 15.26, 15.29, 15.31, 15.34, 15.36, 15.38, 
    15.39, 15.41, 15.43, 15.44, 15.45, 15.47, 15.48, 15.49, 15.5, 
    15.51, 15.52, 15.52, 15.53, 15.54, 15.54)), row.names = c(NA, 
50L), class = "data.frame")

This dataset has 8 columns (t1-t8) on which I want to do a correction. This is done by taking the mean of the first 5 rows per column: colmeans = colMeans(test[1:5,4:11]) and substracting those means from the dataset test[, paste0('t', 1:8)] = test[, paste0('t', 1:8)] - colmeans

However, something is going wrong in this substraction, but I can't figure out what. The outcome of the code above doesn't match the outcome when done by hand for a single row: test2[1, paste0('t', 1:8)] - colmeans (test2 is the same as the original test dataset)

The correct outcome of the substraction for the first line is:

  t1    t2 t3    t4 t5 t6 t7 t8
>1  0 0.008  0 0.002  0  0  0  0

But the outcome I get with the code is wrong:

         t1     t2     t3    t4    t5     t6     t7     t8
>1      0.000 -0.018 -0.010 0.028 0.000  0.030  0.010 -0.030

I know the outcome done by hand is correct, So what is wrong with my code?

CodePudding user response:

This is because the computation is done across rows (instead of columns). You could tranpose twice to correct the problem:

t(t(test[, paste0('t', 1:8)]) - colmeans)
     t1     t2   t3     t4   t5   t6   t7   t8
1  0.00  0.008 0.00  0.002 0.00 0.00 0.00 0.00
2  0.00 -0.002 0.00  0.002 0.00 0.00 0.00 0.00
3  0.00 -0.002 0.00  0.002 0.00 0.00 0.00 0.00
4  0.00 -0.002 0.00  0.002 0.00 0.00 0.00 0.00
5  0.00 -0.002 0.00 -0.008 0.00 0.00 0.00 0.00
6  0.00 -0.002 0.00 -0.008 0.00 0.00 0.00 0.00
7  0.00 -0.002 0.00  0.002 0.01 0.00 0.00 0.01
8  0.01  0.008 0.00  0.002 0.01 0.01 0.00 0.01
9  0.01  0.008 0.01  0.012 0.02 0.02 0.01 0.02
10 0.02  0.018 0.02  0.022 0.04 0.03 0.02 0.03
  • Related