Home > database >  Variables with the same suffix multiply and sum
Variables with the same suffix multiply and sum

Time:11-19

My dataframe has 22 variables. this is a simplified sample. the variables include x1,x2,y1_,y2_. i want to create a new variable. the variable values are x1*y1_ x2*y2_. the code is as follows:

df <- data.frame(x1=c(0,0,0,1),x2=c(0,0,0,1),y1_=c(3,0,2,1),y2_=c(1,0,0,1))
df$var <- df$x1*df$y1_ df$x2*df$y2_

if no. of variables is 22, the above code is unreasonable. so,how to get this variable?

CodePudding user response:

Split dataframe based on column names then multiply, then rowwise sum:

x <- colnames(df)
df$var <- rowSums(df[, grepl("^x", x)] * df[, grepl("^y", x)])
df
#   x1 x2 y1_ y2_ var
# 1  0  0   3   1   0
# 2  0  0   0   0   0
# 3  0  0   2   0   0
# 4  1  1   1   1   2

CodePudding user response:

Bottom line:

df$var <- do.call(` `, 
  lapply(split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df))),
         function(z) apply(z, 1, prod)))
df
#   x1 x2 y1_ y2_ var
# 1  0  0   3   1   0
# 2  0  0   0   0   0
# 3  0  0   2   0   0
# 4  1  1   1   1   2

Initial steps:

gsub(".*([0-9] )_?$", "\\1", names(df))
# [1] "1" "2" "1" "2"
split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df)))
# $`1`
#   x1 y1_
# 1  0   3
# 2  0   0
# 3  0   2
# 4  1   1
# $`2`
#   x2 y2_
# 1  0   1
# 2  0   0
# 3  0   0
# 4  1   1
lapply(split.default(df, gsub(".*([0-9] )_?$", "\\1", names(df))),
       function(z) apply(z, 1, prod))
# $`1`
# [1] 0 0 0 1
# $`2`
# [1] 0 0 0 1

CodePudding user response:

1) Using dplyr and assuming that the columns ending in digits are in the same order as the columns ending in _ and that the two groups end in digits and underscore respectively we can use across like this.

library(dplyr)

df %>% mutate(var = rowSums(across(matches("\\d$")) * across(ends_with("_"))))

giving

  x1 x2 y1_ y2_ var
1  0  0   3   1   0
2  0  0   0   0   0
3  0  0   2   0   0
4  1  1   1   1   2

2) A variation is to use rowwise:

df %>%
  rowwise %>%
  mutate(var = sum(c_across(matches("\\d$")) * c_across(ends_with("_")))) %>%
  ungroup

Note

df <- structure(list(x1 = c(0, 0, 0, 1), x2 = c(0, 0, 0, 1), y1_ = c(3, 
0, 2, 1), y2_ = c(1, 0, 0, 1)), class = "data.frame", row.names = c(NA, 
-4L))

df
##   x1 x2 y1_ y2_
## 1  0  0   3   1
## 2  0  0   0   0
## 3  0  0   2   0
## 4  1  1   1   1

CodePudding user response:

You can achieve this with a combination of various pivotting, mapping and mutating functions from the tidyverse

df <- data.frame(x1=c(1,2,3,4),x2=c(1,1,0,1),y1_=c(3,0,2,1),y2_=c(1,4,0,1))

library(tidyr)
library(dplyr)
library(purrr)

pivoted <-
  pivot_longer(
    df,
    cols = everything(),
    names_to = c("letter", "number"),
    names_pattern = "(.)(.)"
  )

pivoted
#> # A tibble: 16 × 3
#>    letter number value
#>    <chr>  <chr>  <dbl>
#>  1 x      1          1
#>  2 x      2          1
#>  3 y      1          3
#>  4 y      2          1
#>  5 x      1          2
#>  6 x      2          1
#>  7 y      1          0
#>  8 y      2          4
#>  9 x      1          3
#> 10 x      2          0
#> 11 y      1          2
#> 12 y      2          0
#> 13 x      1          4
#> 14 x      2          1
#> 15 y      1          1
#> 16 y      2          1

nested <- 
  pivoted |> 
  group_by(letter, number) |> 
  nest(num_data = value)

nested
#> # A tibble: 4 × 3
#> # Groups:   letter, number [4]
#>   letter number num_data        
#>   <chr>  <chr>  <list>          
#> 1 x      1      <tibble [4 × 1]>
#> 2 x      2      <tibble [4 × 1]>
#> 3 y      1      <tibble [4 × 1]>
#> 4 y      2      <tibble [4 × 1]>

summarised <-
  nested |>
  group_by(number) |> 
  summarise(across(num_data, pmap, list))

summarised
#> # A tibble: 2 × 2
#>   number num_data    
#>   <chr>  <named list>
#> 1 1      <list [2]>  
#> 2 2      <list [2]>

summarised <- rowwise(summarised)

summarised <- 
  transmute(
    summarised,
    products = list(
      pmap(num_data, prod)
    )
  )

summarised[["products"]]
#> [[1]]
#> [[1]][[1]]
#> [1] 3
#> 
#> [[1]][[2]]
#> [1] 0
#> 
#> [[1]][[3]]
#> [1] 6
#> 
#> [[1]][[4]]
#> [1] 4
#> 
#> 
#> [[2]]
#> [[2]][[1]]
#> [1] 1
#> 
#> [[2]][[2]]
#> [1] 4
#> 
#> [[2]][[3]]
#> [1] 0
#> 
#> [[2]][[4]]
#> [1] 1

df[["var"]] <- 
  summarised[["products"]] |> 
  pmap_dbl(sum)

df
#>   x1 x2 y1_ y2_ var
#> 1  1  1   3   1   4
#> 2  2  1   0   4   4
#> 3  3  0   2   0   6
#> 4  4  1   1   1   5

Created on 2022-11-18 with reprex v2.0.2

  • Related