I have the following data
sim_model = "
x =~ 1.3*x1 1.2*x2 1.1*x3 1.2*x4
y =~ 1*y1 1.2*y2 1.3*y3 0.9*y4
y ~ 0.6*x
"
sim_data = lavaan::simulateData(sim_model)
model = "
x =~ x1 x2 x3 x4
y =~ y1 y2 y3 y4
y ~ x
"
sd_d = data.frame(sd_d = apply(sim_data, 2, sd))
I need to multiply each column of the sim_data
with its corresponding standard deviation which is in sd_d
.
Any help?
CodePudding user response:
Perhaps this does work? This adds columns to the end of the data frame containing your requested values.
sim_data |>
mutate(SDxX1 = sd(x1)*x1,
SDxX2 = sd(x2)*x2,
SDxX3 = sd(x3)*x3,
SDxX4 = sd(x4)*x4,
SDxY1 = sd(y1)*y1,
SDxY2 = sd(y2)*y2,
SDxY3 = sd(y3)*y3,
SDxY4 = sd(y4)*y4
)
CodePudding user response:
How about this:
library(dplyr)
sim_model = "
x =~ 1.3*x1 1.2*x2 1.1*x3 1.2*x4
y =~ 1*y1 1.2*y2 1.3*y3 0.9*y4
y ~ 0.6*x
"
sim_data = lavaan::simulateData(sim_model)
model = "
x =~ x1 x2 x3 x4
y =~ y1 y2 y3 y4
y ~ x
"
sd_d = data.frame(sd_d = apply(sim_data, 2, sd))
new_sim_data <- sapply(1:ncol(sim_data), function(i)
sim_data[[i]]*sd_d$sd_d[i])
head(new_sim_data)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 3.9817268 4.4529682 1.7882380 2.621278 -0.3147092 0.7048940
#> [2,] -0.8972617 -0.4562149 -0.1165654 1.318948 0.4359371 0.4220787
#> [3,] 1.9188604 0.9183960 5.3265835 4.025215 1.6147254 1.8146241
#> [4,] -4.2811180 -0.4473838 -1.4982330 -1.325111 -2.5972828 -0.7700888
#> [5,] -2.8633480 2.4930664 1.9927546 -1.186898 3.8177569 4.4855348
#> [6,] 1.0197316 0.7887374 2.2055450 2.039363 2.8806220 9.2947559
#> [,7] [,8]
#> [1,] 2.2355215 -1.3586282
#> [2,] -1.0632624 -0.6658058
#> [3,] 0.1758628 0.1879555
#> [4,] -3.0958775 2.8376086
#> [5,] 4.5647521 3.5110156
#> [6,] 7.0123519 1.3295521
Created on 2022-11-26 by the reprex package (v2.0.1)
CodePudding user response:
Here are a couple of tidyverse
solutions. It would be easiest to just calculate everything you need from within the dataframe. If you really need to use sd_d
then it is possible, but a little more convoluted:
library(tidyverse)
#easiest solution
sim_data |>
mutate(across(x1:y4, list(sd = \(x) sd(x)*x)))
#> # A tibble: 500 x 16
#> x1 x2 x3 x4 y1 y2 y3 y4 x1_sd x2_sd x3_sd
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -0.926 0.468 0.476 -0.252 -2.53 -0.374 0.690 0.354 -1.47 0.728 0.697
#> 2 -0.813 -0.782 -0.820 0.300 0.978 1.48 -0.566 1.45 -1.29 -1.22 -1.20
#> 3 -3.28 -2.18 -2.48 -1.12 -0.235 -0.562 0.919 1.78 -5.20 -3.39 -3.63
#> 4 1.18 2.49 2.52 2.46 2.97 3.31 0.881 -1.10 1.87 3.87 3.69
#> 5 1.33 0.328 1.83 -2.04 0.204 1.40 1.94 1.77 2.10 0.511 2.68
#> 6 -0.388 -3.12 -1.03 0.137 -0.333 -0.167 0.482 -0.417 -0.615 -4.85 -1.50
#> 7 -1.12 -1.83 0.143 -1.75 0.169 -1.51 -2.40 -0.793 -1.77 -2.84 0.209
#> 8 -3.04 -0.899 -2.77 -0.586 -1.98 -1.13 -2.97 -1.64 -4.81 -1.40 -4.06
#> 9 -0.856 1.46 1.37 -0.617 1.45 -0.149 -0.169 0.842 -1.36 2.26 2.00
#> 10 0.492 0.506 0.616 -1.23 -0.841 0.132 -0.528 -1.51 0.779 0.786 0.902
#> # ... with 490 more rows, and 5 more variables: x4_sd <dbl>, y1_sd <dbl>,
#> # y2_sd <dbl>, y3_sd <dbl>, y4_sd <dbl>
#less easy solution
map_dfc(colnames(sim_data), \(col){
tibble(!!sym(col) := pull(sim_data, {{col}}),
!!sym(paste0(col, "_sd")) := pull(sim_data, {{col}}) * sd_d[rownames(sd_d) == col,])
}) |>
select(colnames(sim_data), contains("_sd"))
#> # A tibble: 500 x 16
#> x1 x2 x3 x4 y1 y2 y3 y4 x1_sd x2_sd x3_sd
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -0.926 0.468 0.476 -0.252 -2.53 -0.374 0.690 0.354 -1.47 0.728 0.697
#> 2 -0.813 -0.782 -0.820 0.300 0.978 1.48 -0.566 1.45 -1.29 -1.22 -1.20
#> 3 -3.28 -2.18 -2.48 -1.12 -0.235 -0.562 0.919 1.78 -5.20 -3.39 -3.63
#> 4 1.18 2.49 2.52 2.46 2.97 3.31 0.881 -1.10 1.87 3.87 3.69
#> 5 1.33 0.328 1.83 -2.04 0.204 1.40 1.94 1.77 2.10 0.511 2.68
#> 6 -0.388 -3.12 -1.03 0.137 -0.333 -0.167 0.482 -0.417 -0.615 -4.85 -1.50
#> 7 -1.12 -1.83 0.143 -1.75 0.169 -1.51 -2.40 -0.793 -1.77 -2.84 0.209
#> 8 -3.04 -0.899 -2.77 -0.586 -1.98 -1.13 -2.97 -1.64 -4.81 -1.40 -4.06
#> 9 -0.856 1.46 1.37 -0.617 1.45 -0.149 -0.169 0.842 -1.36 2.26 2.00
#> 10 0.492 0.506 0.616 -1.23 -0.841 0.132 -0.528 -1.51 0.779 0.786 0.902
#> # ... with 490 more rows, and 5 more variables: x4_sd <dbl>, y1_sd <dbl>,
#> # y2_sd <dbl>, y3_sd <dbl>, y4_sd <dbl>