Home > Enterprise >  Performing arithmetic across data frames (tibbles)
Performing arithmetic across data frames (tibbles)

Time:11-23

Is there a way to multiply each variable (a, b, c) value in df by its corresponding group mean and divide by its standard deviation in df_summary. I would like to do it without hardcoding? Thanks

library(tidyverse)

set.seed(1)

df <- tibble(a = rnorm(10),
             b = rnorm(10),
             c = rnorm(10)) %>% 
  mutate(group = c(rep(1, 5), rep(2, 5)),
         .before = "a")

df_summary <- df %>% 
  group_by(group) %>% 
  summarise(across(.cols = everything(),
                   .fns = list(mean = mean, 
                               sd = sd),
                   .names = "{.col}_{.fn}")) %>% 
  ungroup()

df 
#> # A tibble: 10 × 4
#>    group      a       b       c
#>    <dbl>  <dbl>   <dbl>   <dbl>
#>  1     1 -0.626  1.51    0.919 
#>  2     1  0.184  0.390   0.782 
#>  3     1 -0.836 -0.621   0.0746
#>  4     1  1.60  -2.21   -1.99  
#>  5     1  0.330  1.12    0.620 
#>  6     2 -0.820 -0.0449 -0.0561
#>  7     2  0.487 -0.0162 -0.156 
#>  8     2  0.738  0.944  -1.47  
#>  9     2  0.576  0.821  -0.478 
#> 10     2 -0.305  0.594   0.418

df_summary
#> # A tibble: 2 × 7
#>   group a_mean  a_sd b_mean  b_sd  c_mean  c_sd
#>   <dbl>  <dbl> <dbl>  <dbl> <dbl>   <dbl> <dbl>
#> 1     1  0.129 0.961 0.0381 1.50   0.0812 1.20 
#> 2     2  0.135 0.669 0.460  0.465 -0.349  0.705

Created on 2021-11-23 by the reprex package (v2.0.1)

CodePudding user response:

This may helps.

library(dplyr)
df %>%
  group_by(group) %>%
  mutate(across(everything(), ~.x * mean(.x)/sd(.x)))

   group       a        b        c
   <dbl>   <dbl>    <dbl>    <dbl>
 1     1 -0.0843  0.0385   0.0622 
 2     1  0.0247  0.00992  0.0529 
 3     1 -0.112  -0.0158   0.00504
 4     1  0.215  -0.0563  -0.135  
 5     1  0.0443  0.0286   0.0419 
 6     2 -0.166  -0.0444   0.0278 
 7     2  0.0985 -0.0160   0.0771 
 8     2  0.149   0.933    0.728  
 9     2  0.116   0.812    0.237  
10     2 -0.0617  0.587   -0.207  

New

It gets pretty messy

library(tidyverse)

df2 <- df_summary %>%
  melt(id.vars = "group") %>%
  separate(variable, sep = "_", into = c("variable", "func")) %>%
  pivot_wider(id_cols = c(group, variable), names_from = func, values_from = value)
  
df %>%
  melt(id.vars = "group") %>%
  left_join(df2, by = c("group", "variable")) %>%
  mutate(value = value * mean / sd) %>%
  select(-mean, -sd) %>%
  group_by(variable) %>%
  mutate(key = 1, key = cumsum(key)) %>%
  pivot_wider(id_cols = c(key, group), names_from = variable, values_from = value) %>%
  select(-key)

   group       a        b        c
   <dbl>   <dbl>    <dbl>    <dbl>
 1     1 -0.0841  0.0384   0.0622 
 2     1  0.0247  0.00990  0.0529 
 3     1 -0.112  -0.0158   0.00505
 4     1  0.214  -0.0563  -0.135  
 5     1  0.0442  0.0286   0.0419 
 6     2 -0.166  -0.0445   0.0278 
 7     2  0.0984 -0.0160   0.0771 
 8     2  0.149   0.934    0.728  
 9     2  0.116   0.812    0.237  
10     2 -0.0616  0.588   -0.207 

CodePudding user response:

An alternative approach - using vectorization and some wrangling to get matrix back into tibble format

library(tidyverse)
set.seed(1)
df <- tibble(a = rnorm(10), b = rnorm(10), c = rnorm(10)) %>%
  mutate(group = c(rep(1, 5), rep(2, 5)), .before = "a")

f <- function(df, grp) {
  df %>%
    group_nest({{ grp }}) %>%
    mutate(data = map(data, Vectorize(\(col) col * mean(col) / sd(col)))) %>%
    unnest(c(data)) %>%
    mutate(data = as_tibble(data)) %>%
    unpack(data) %>%
    setNames(., nm = names(df))
}

df %>%
  f(group)

#> # A tibble: 10 x 4
#>    group       a        b        c
#>    <dbl>   <dbl>    <dbl>    <dbl>
#>  1     1 -0.0843  0.0385   0.0622 
#>  2     1  0.0247  0.00992  0.0529 
#>  3     1 -0.112  -0.0158   0.00504
#>  4     1  0.215  -0.0563  -0.135  
#>  5     1  0.0443  0.0286   0.0419 
#>  6     2 -0.166  -0.0444   0.0278 
#>  7     2  0.0985 -0.0160   0.0771 
#>  8     2  0.149   0.933    0.728  
#>  9     2  0.116   0.812    0.237  
#> 10     2 -0.0617  0.587   -0.207

Note that the resulting tibble is ungrouped.

  • Related