Is there a way to multiply each variable (a
, b
, c
) value in df
by its corresponding group mean and divide by its standard deviation in df_summary
. I would like to do it without hardcoding? Thanks
library(tidyverse)
set.seed(1)
df <- tibble(a = rnorm(10),
b = rnorm(10),
c = rnorm(10)) %>%
mutate(group = c(rep(1, 5), rep(2, 5)),
.before = "a")
df_summary <- df %>%
group_by(group) %>%
summarise(across(.cols = everything(),
.fns = list(mean = mean,
sd = sd),
.names = "{.col}_{.fn}")) %>%
ungroup()
df
#> # A tibble: 10 × 4
#> group a b c
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.626 1.51 0.919
#> 2 1 0.184 0.390 0.782
#> 3 1 -0.836 -0.621 0.0746
#> 4 1 1.60 -2.21 -1.99
#> 5 1 0.330 1.12 0.620
#> 6 2 -0.820 -0.0449 -0.0561
#> 7 2 0.487 -0.0162 -0.156
#> 8 2 0.738 0.944 -1.47
#> 9 2 0.576 0.821 -0.478
#> 10 2 -0.305 0.594 0.418
df_summary
#> # A tibble: 2 × 7
#> group a_mean a_sd b_mean b_sd c_mean c_sd
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.129 0.961 0.0381 1.50 0.0812 1.20
#> 2 2 0.135 0.669 0.460 0.465 -0.349 0.705
Created on 2021-11-23 by the reprex package (v2.0.1)
CodePudding user response:
This may helps.
library(dplyr)
df %>%
group_by(group) %>%
mutate(across(everything(), ~.x * mean(.x)/sd(.x)))
group a b c
<dbl> <dbl> <dbl> <dbl>
1 1 -0.0843 0.0385 0.0622
2 1 0.0247 0.00992 0.0529
3 1 -0.112 -0.0158 0.00504
4 1 0.215 -0.0563 -0.135
5 1 0.0443 0.0286 0.0419
6 2 -0.166 -0.0444 0.0278
7 2 0.0985 -0.0160 0.0771
8 2 0.149 0.933 0.728
9 2 0.116 0.812 0.237
10 2 -0.0617 0.587 -0.207
New
It gets pretty messy
library(tidyverse)
df2 <- df_summary %>%
melt(id.vars = "group") %>%
separate(variable, sep = "_", into = c("variable", "func")) %>%
pivot_wider(id_cols = c(group, variable), names_from = func, values_from = value)
df %>%
melt(id.vars = "group") %>%
left_join(df2, by = c("group", "variable")) %>%
mutate(value = value * mean / sd) %>%
select(-mean, -sd) %>%
group_by(variable) %>%
mutate(key = 1, key = cumsum(key)) %>%
pivot_wider(id_cols = c(key, group), names_from = variable, values_from = value) %>%
select(-key)
group a b c
<dbl> <dbl> <dbl> <dbl>
1 1 -0.0841 0.0384 0.0622
2 1 0.0247 0.00990 0.0529
3 1 -0.112 -0.0158 0.00505
4 1 0.214 -0.0563 -0.135
5 1 0.0442 0.0286 0.0419
6 2 -0.166 -0.0445 0.0278
7 2 0.0984 -0.0160 0.0771
8 2 0.149 0.934 0.728
9 2 0.116 0.812 0.237
10 2 -0.0616 0.588 -0.207
CodePudding user response:
An alternative approach - using vectorization and some wrangling to get matrix back into tibble format
library(tidyverse)
set.seed(1)
df <- tibble(a = rnorm(10), b = rnorm(10), c = rnorm(10)) %>%
mutate(group = c(rep(1, 5), rep(2, 5)), .before = "a")
f <- function(df, grp) {
df %>%
group_nest({{ grp }}) %>%
mutate(data = map(data, Vectorize(\(col) col * mean(col) / sd(col)))) %>%
unnest(c(data)) %>%
mutate(data = as_tibble(data)) %>%
unpack(data) %>%
setNames(., nm = names(df))
}
df %>%
f(group)
#> # A tibble: 10 x 4
#> group a b c
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.0843 0.0385 0.0622
#> 2 1 0.0247 0.00992 0.0529
#> 3 1 -0.112 -0.0158 0.00504
#> 4 1 0.215 -0.0563 -0.135
#> 5 1 0.0443 0.0286 0.0419
#> 6 2 -0.166 -0.0444 0.0278
#> 7 2 0.0985 -0.0160 0.0771
#> 8 2 0.149 0.933 0.728
#> 9 2 0.116 0.812 0.237
#> 10 2 -0.0617 0.587 -0.207
Note that the resulting tibble is ungrouped.