Variables with the same suffix multiply and sum-CodePudding

My dataframe has 22 variables. this is a simplified sample. the variables include x1,x2,y1_,y2_. i want to create a new variable. the variable values are x1*y1_ x2*y2_. the code is as follows:

df <- data.frame(x1=c(0,0,0,1),x2=c(0,0,0,1),y1_=c(3,0,2,1),y2_=c(1,0,0,1))
df$var <- df$x1*df$y1_ df$x2*df$y2_

if no. of variables is 22, the above code is unreasonable. so,how to get this variable?

CodePudding user response：

Split dataframe based on column names then multiply, then rowwise sum:

x <- colnames(df)
df$var <- rowSums(df[, grepl("^x", x)] * df[, grepl("^y", x)])
df
#   x1 x2 y1_ y2_ var
# 1  0  0   3   1   0
# 2  0  0   0   0   0
# 3  0  0   2   0   0
# 4  1  1   1   1   2

CodePudding user response：

Bottom line:

df$var <- do.call(` `, 
  lapply(split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df))),
         function(z) apply(z, 1, prod)))
df
#   x1 x2 y1_ y2_ var
# 1  0  0   3   1   0
# 2  0  0   0   0   0
# 3  0  0   2   0   0
# 4  1  1   1   1   2

Initial steps:

gsub(".*([0-9] )_?$", "\\1", names(df))
# [1] "1" "2" "1" "2"
split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df)))
# $`1`
#   x1 y1_
# 1  0   3
# 2  0   0
# 3  0   2
# 4  1   1
# $`2`
#   x2 y2_
# 1  0   1
# 2  0   0
# 3  0   0
# 4  1   1
lapply(split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df))),
       function(z) apply(z, 1, prod))
# $`1`
# [1] 0 0 0 1
# $`2`
# [1] 0 0 0 1

CodePudding user response：

1) Using dplyr and assuming that the columns ending in digits are in the same order as the columns ending in _ and that the two groups end in digits and underscore respectively we can use across like this.

library(dplyr)

df %>% mutate(var = rowSums(across(matches("\\d$")) * across(ends_with("_"))))

giving

  x1 x2 y1_ y2_ var
1  0  0   3   1   0
2  0  0   0   0   0
3  0  0   2   0   0
4  1  1   1   1   2

2) A variation is to use rowwise:

df %>%
  rowwise %>%
  mutate(var = sum(c_across(matches("\\d$")) * c_across(ends_with("_")))) %>%
  ungroup

Note

df <- structure(list(x1 = c(0, 0, 0, 1), x2 = c(0, 0, 0, 1), y1_ = c(3, 
0, 2, 1), y2_ = c(1, 0, 0, 1)), class = "data.frame", row.names = c(NA, 
-4L))

df
##   x1 x2 y1_ y2_
## 1  0  0   3   1
## 2  0  0   0   0
## 3  0  0   2   0
## 4  1  1   1   1

CodePudding user response：

You can achieve this with a combination of various pivotting, mapping and mutating functions from the tidyverse

df <- data.frame(x1=c(1,2,3,4),x2=c(1,1,0,1),y1_=c(3,0,2,1),y2_=c(1,4,0,1))

library(tidyr)
library(dplyr)
library(purrr)

pivoted <-
  pivot_longer(
    df,
    cols = everything(),
    names_to = c("letter", "number"),
    names_pattern = "(.)(.)"
  )

pivoted
#> # A tibble: 16 × 3
#>    letter number value
#>    <chr>  <chr>  <dbl>
#>  1 x      1          1
#>  2 x      2          1
#>  3 y      1          3
#>  4 y      2          1
#>  5 x      1          2
#>  6 x      2          1
#>  7 y      1          0
#>  8 y      2          4
#>  9 x      1          3
#> 10 x      2          0
#> 11 y      1          2
#> 12 y      2          0
#> 13 x      1          4
#> 14 x      2          1
#> 15 y      1          1
#> 16 y      2          1

nested <- 
  pivoted |> 
  group_by(letter, number) |> 
  nest(num_data = value)

nested
#> # A tibble: 4 × 3
#> # Groups:   letter, number [4]
#>   letter number num_data        
#>   <chr>  <chr>  <list>          
#> 1 x      1      <tibble [4 × 1]>
#> 2 x      2      <tibble [4 × 1]>
#> 3 y      1      <tibble [4 × 1]>
#> 4 y      2      <tibble [4 × 1]>

summarised <-
  nested |>
  group_by(number) |> 
  summarise(across(num_data, pmap, list))

summarised
#> # A tibble: 2 × 2
#>   number num_data    
#>   <chr>  <named list>
#> 1 1      <list [2]>  
#> 2 2      <list [2]>

summarised <- rowwise(summarised)

summarised <- 
  transmute(
    summarised,
    products = list(
      pmap(num_data, prod)
    )
  )

summarised[["products"]]
#> [[1]]
#> [[1]][[1]]
#> [1] 3
#> 
#> [[1]][[2]]
#> [1] 0
#> 
#> [[1]][[3]]
#> [1] 6
#> 
#> [[1]][[4]]
#> [1] 4
#> 
#> 
#> [[2]]
#> [[2]][[1]]
#> [1] 1
#> 
#> [[2]][[2]]
#> [1] 4
#> 
#> [[2]][[3]]
#> [1] 0
#> 
#> [[2]][[4]]
#> [1] 1

df[["var"]] <- 
  summarised[["products"]] |> 
  pmap_dbl(sum)

df
#>   x1 x2 y1_ y2_ var
#> 1  1  1   3   1   4
#> 2  2  1   0   4   4
#> 3  3  0   2   0   6
#> 4  4  1   1   1   5

^{Created on 2022-11-18 with reprex v2.0.2}