Home > database >  Compute multiple column to column differences R
Compute multiple column to column differences R

Time:12-15

I would like to compute differences among several columns, per identifiers (see script below for reproducible example and target data frame).

This question is somehow similar, but only for pairs of identifiers. I can't think on how to adapt it. I could also have several data frame, one per identifier, but I also don't know in that case how to compute multiple columns differences.

The code below allows to create a sample dataset, and has the code I currently use. It gives me what I want, I'd just like to know if there is a way not to spell out all the differences I want to compute (in my dataset, I have more parameters and depths than in that sample data).

Thanks in advance for your help!

library(tidyverse)
# sample data
create.dt <-  function(t = 0) {
   data.frame(parameter = rep(c("temperature","oxygen"), each = 3),
                  date = rep(c(Sys.Date() t), each = 6),
                  depth = rep(1:3, times = 2),
             value = c(data.frame(x = rnorm(3, 16, 2)) %>% 
                         arrange(-x) %>% pull,
                       data.frame(x = rnorm(3, 7, 1)) %>% 
                         arrange(-x) %>% pull
                  ))
  
  }

# Multi-site dataset
dt <- rbind(
  cbind(site = "A", create.dt(t = c(-3:0))),
  cbind(site = "B", create.dt(t = c(-3:0))),
  cbind(site = "C", create.dt(t = c(-3:0))),
  cbind(site = "D", create.dt(t = c(-3:0))),
  cbind(site = "E", create.dt(t = c(-3:0))))

# Reshape the data and compute differences
dt %>% pivot_wider(id_cols = c(site,date), names_from = c(parameter,depth), values_from = value, names_sep = "_") %>%
  # do the difference, depth to depth, parameter by parameter
  # What I would like is not have to write manually each differences pair
  mutate(temperature_1_2 = temperature_1 - temperature_2,
         temperature_1_3 = temperature_1 - temperature_3,
         temperature_2_3 = temperature_2 - temperature_3,
         oxygen_1_2 = oxygen_1 - oxygen_2,
         oxygen_1_3 = oxygen_1 - oxygen_3,
         oxygen_2_3 = oxygen_2 - oxygen_3)

CodePudding user response:

library(tidyverse)
library(rlang)

create.dt <-  function(t = 0) {
  data.frame(parameter = rep(c("temperature","oxygen"), each = 3),
             date = rep(c(Sys.Date() t), each = 6),
             depth = rep(1:3, times = 2),
             value = c(data.frame(x = rnorm(3, 16, 2)) %>% 
                         arrange(-x) %>% pull,
                       data.frame(x = rnorm(3, 7, 1)) %>% 
                         arrange(-x) %>% pull
             ))
  
}

# Multi-site dataset
dt <- rbind(
  cbind(site = "A", create.dt(t = c(-3:0))),
  cbind(site = "B", create.dt(t = c(-3:0))),
  cbind(site = "C", create.dt(t = c(-3:0))),
  cbind(site = "D", create.dt(t = c(-3:0))),
  cbind(site = "E", create.dt(t = c(-3:0))))

# result

temperature <- str_c("temperature_", 1:3)
oxygen <- str_c("oxygen_", 1:3)

temperature_frml <- combn(temperature, m = 2, FUN = function(x) str_c(x, collapse = " - ")) 
oxygen_frml <- combn(oxygen, m = 2, FUN = function(x) str_c(x, collapse = " - ")) 
all_frml <- c(temperature_frml, oxygen_frml)

df_wider <- dt %>% pivot_wider(
  id_cols = c(site, date),
  names_from = c(parameter, depth),
  values_from = value,
  names_sep = "_"
)

bind_cols(df_wider,
          map_dfc(
            .x = all_frml,
            .f = ~ transmute(.data = df_wider,!!.x :=  eval(parse_expr(.x)))
          ))
#> # A tibble: 20 x 14
#>    site  date       temperature_1 temperature_2 temperature_3 oxygen_1 oxygen_2
#>    <chr> <date>             <dbl>         <dbl>         <dbl>    <dbl>    <dbl>
#>  1 A     2021-12-11          17.6          17.1          12.9     7.34     6.86
#>  2 A     2021-12-12          17.6          17.1          12.9     7.34     6.86
#>  3 A     2021-12-13          17.6          17.1          12.9     7.34     6.86
#>  4 A     2021-12-14          17.6          17.1          12.9     7.34     6.86
#>  5 B     2021-12-11          17.1          15.6          13.7     8.52     7.58
#>  6 B     2021-12-12          17.1          15.6          13.7     8.52     7.58
#>  7 B     2021-12-13          17.1          15.6          13.7     8.52     7.58
#>  8 B     2021-12-14          17.1          15.6          13.7     8.52     7.58
#>  9 C     2021-12-11          17.7          15.5          13.6     7.66     7.31
#> 10 C     2021-12-12          17.7          15.5          13.6     7.66     7.31
#> 11 C     2021-12-13          17.7          15.5          13.6     7.66     7.31
#> 12 C     2021-12-14          17.7          15.5          13.6     7.66     7.31
#> 13 D     2021-12-11          16.5          16.4          14.5     7.50     7.27
#> 14 D     2021-12-12          16.5          16.4          14.5     7.50     7.27
#> 15 D     2021-12-13          16.5          16.4          14.5     7.50     7.27
#> 16 D     2021-12-14          16.5          16.4          14.5     7.50     7.27
#> 17 E     2021-12-11          16.7          16.1          15.7     7.52     7.51
#> 18 E     2021-12-12          16.7          16.1          15.7     7.52     7.51
#> 19 E     2021-12-13          16.7          16.1          15.7     7.52     7.51
#> 20 E     2021-12-14          16.7          16.1          15.7     7.52     7.51
#> # ... with 7 more variables: oxygen_3 <dbl>,
#> #   temperature_1 - temperature_2 <dbl>, temperature_1 - temperature_3 <dbl>,
#> #   temperature_2 - temperature_3 <dbl>, oxygen_1 - oxygen_2 <dbl>,
#> #   oxygen_1 - oxygen_3 <dbl>, oxygen_2 - oxygen_3 <dbl>

Created on 2021-12-14 by the reprex package (v2.0.1)

  • Related