Home > OS >  How to multiply each column of dataframe with its standard deviation
How to multiply each column of dataframe with its standard deviation

Time:11-27

I have the following data

sim_model = "
x =~ 1.3*x1   1.2*x2   1.1*x3   1.2*x4
y =~ 1*y1   1.2*y2   1.3*y3   0.9*y4
y ~ 0.6*x
"
sim_data = lavaan::simulateData(sim_model)
model = "
x =~ x1   x2   x3   x4
y =~ y1   y2   y3   y4
y ~ x
"
sd_d = data.frame(sd_d = apply(sim_data, 2, sd))

I need to multiply each column of the sim_data with its corresponding standard deviation which is in sd_d.

Any help?

CodePudding user response:

Perhaps this does work? This adds columns to the end of the data frame containing your requested values.


sim_data |>
  mutate(SDxX1 = sd(x1)*x1,
         SDxX2 = sd(x2)*x2,
         SDxX3 = sd(x3)*x3,
         SDxX4 = sd(x4)*x4,
         SDxY1 = sd(y1)*y1,
         SDxY2 = sd(y2)*y2,
         SDxY3 = sd(y3)*y3,
         SDxY4 = sd(y4)*y4
)

CodePudding user response:

How about this:

library(dplyr)
sim_model = "
x =~ 1.3*x1   1.2*x2   1.1*x3   1.2*x4
y =~ 1*y1   1.2*y2   1.3*y3   0.9*y4
y ~ 0.6*x
"
sim_data = lavaan::simulateData(sim_model)
model = "
x =~ x1   x2   x3   x4
y =~ y1   y2   y3   y4
y ~ x
"
sd_d = data.frame(sd_d = apply(sim_data, 2, sd))

new_sim_data <- sapply(1:ncol(sim_data), function(i)
       sim_data[[i]]*sd_d$sd_d[i])
head(new_sim_data)
#>            [,1]       [,2]       [,3]      [,4]       [,5]       [,6]
#> [1,]  3.9817268  4.4529682  1.7882380  2.621278 -0.3147092  0.7048940
#> [2,] -0.8972617 -0.4562149 -0.1165654  1.318948  0.4359371  0.4220787
#> [3,]  1.9188604  0.9183960  5.3265835  4.025215  1.6147254  1.8146241
#> [4,] -4.2811180 -0.4473838 -1.4982330 -1.325111 -2.5972828 -0.7700888
#> [5,] -2.8633480  2.4930664  1.9927546 -1.186898  3.8177569  4.4855348
#> [6,]  1.0197316  0.7887374  2.2055450  2.039363  2.8806220  9.2947559
#>            [,7]       [,8]
#> [1,]  2.2355215 -1.3586282
#> [2,] -1.0632624 -0.6658058
#> [3,]  0.1758628  0.1879555
#> [4,] -3.0958775  2.8376086
#> [5,]  4.5647521  3.5110156
#> [6,]  7.0123519  1.3295521

Created on 2022-11-26 by the reprex package (v2.0.1)

CodePudding user response:

Here are a couple of tidyverse solutions. It would be easiest to just calculate everything you need from within the dataframe. If you really need to use sd_d then it is possible, but a little more convoluted:

library(tidyverse)

#easiest solution
sim_data |>
  mutate(across(x1:y4, list(sd = \(x) sd(x)*x)))
#> # A tibble: 500 x 16
#>        x1     x2     x3     x4     y1     y2     y3     y4  x1_sd  x2_sd  x3_sd
#>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#>  1 -0.926  0.468  0.476 -0.252 -2.53  -0.374  0.690  0.354 -1.47   0.728  0.697
#>  2 -0.813 -0.782 -0.820  0.300  0.978  1.48  -0.566  1.45  -1.29  -1.22  -1.20 
#>  3 -3.28  -2.18  -2.48  -1.12  -0.235 -0.562  0.919  1.78  -5.20  -3.39  -3.63 
#>  4  1.18   2.49   2.52   2.46   2.97   3.31   0.881 -1.10   1.87   3.87   3.69 
#>  5  1.33   0.328  1.83  -2.04   0.204  1.40   1.94   1.77   2.10   0.511  2.68 
#>  6 -0.388 -3.12  -1.03   0.137 -0.333 -0.167  0.482 -0.417 -0.615 -4.85  -1.50 
#>  7 -1.12  -1.83   0.143 -1.75   0.169 -1.51  -2.40  -0.793 -1.77  -2.84   0.209
#>  8 -3.04  -0.899 -2.77  -0.586 -1.98  -1.13  -2.97  -1.64  -4.81  -1.40  -4.06 
#>  9 -0.856  1.46   1.37  -0.617  1.45  -0.149 -0.169  0.842 -1.36   2.26   2.00 
#> 10  0.492  0.506  0.616 -1.23  -0.841  0.132 -0.528 -1.51   0.779  0.786  0.902
#> # ... with 490 more rows, and 5 more variables: x4_sd <dbl>, y1_sd <dbl>,
#> #   y2_sd <dbl>, y3_sd <dbl>, y4_sd <dbl>


#less easy solution
map_dfc(colnames(sim_data), \(col){
  tibble(!!sym(col) := pull(sim_data, {{col}}),
         !!sym(paste0(col, "_sd")) := pull(sim_data, {{col}}) * sd_d[rownames(sd_d) == col,])
}) |>
  select(colnames(sim_data), contains("_sd"))
#> # A tibble: 500 x 16
#>        x1     x2     x3     x4     y1     y2     y3     y4  x1_sd  x2_sd  x3_sd
#>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#>  1 -0.926  0.468  0.476 -0.252 -2.53  -0.374  0.690  0.354 -1.47   0.728  0.697
#>  2 -0.813 -0.782 -0.820  0.300  0.978  1.48  -0.566  1.45  -1.29  -1.22  -1.20 
#>  3 -3.28  -2.18  -2.48  -1.12  -0.235 -0.562  0.919  1.78  -5.20  -3.39  -3.63 
#>  4  1.18   2.49   2.52   2.46   2.97   3.31   0.881 -1.10   1.87   3.87   3.69 
#>  5  1.33   0.328  1.83  -2.04   0.204  1.40   1.94   1.77   2.10   0.511  2.68 
#>  6 -0.388 -3.12  -1.03   0.137 -0.333 -0.167  0.482 -0.417 -0.615 -4.85  -1.50 
#>  7 -1.12  -1.83   0.143 -1.75   0.169 -1.51  -2.40  -0.793 -1.77  -2.84   0.209
#>  8 -3.04  -0.899 -2.77  -0.586 -1.98  -1.13  -2.97  -1.64  -4.81  -1.40  -4.06 
#>  9 -0.856  1.46   1.37  -0.617  1.45  -0.149 -0.169  0.842 -1.36   2.26   2.00 
#> 10  0.492  0.506  0.616 -1.23  -0.841  0.132 -0.528 -1.51   0.779  0.786  0.902
#> # ... with 490 more rows, and 5 more variables: x4_sd <dbl>, y1_sd <dbl>,
#> #   y2_sd <dbl>, y3_sd <dbl>, y4_sd <dbl>
  •  Tags:  
  • r
  • Related