Add lists as variables in a dataframe-CodePudding

I need an efficient way to add a series of variables together, preferably without a loop.

library(dplyr) 
avar_2000  <- c(2,5,1)
avar_2001  <- c(2,3,1)
avar_2002  <- c(7,2,5)
bvar_2000  <- c(9,1,1)
bvar_2001  <- c(5,5,3)
bvar_2002  <- c(3,8,NA)


df <- data.frame(avar_2000,avar_2001,avar_2002,bvar_2000,bvar_2001,bvar_2002)

print(df)

avar_2000	avar_2001	avar_2002	bvar_2000	bvar_2001	bvar_2002
2	2	7	9	5	3
5	3	2	1	5	8
1	1	5	1	3	NA

bvars <- c ("bvar_2000","bvar_2001","bvar_2002")

df2 <- df %>% mutate (avar_sum = avar_2000   avar_2001   avar_2002) %>%
  rowwise() %>%
  mutate (bvar_sum = sum(across(all_of(bvars))))

print(df2)

avar_2000	avar_2001	avar_2002	bvar_2000	bvar_2001	bvar_2002	avar_sum	bvar_sum
2	2	7	9	5	3	11	17
5	3	2	1	5	8	10	14
1	1	5	1	3	NA	7	NA

Two puzzles:

How can I sum up all the avars and bvars within one line? My project has many of these series, and it would be neat if I could say, "add up all the avars into avars_sum, bvars into bvars_sum, etc.". Is that possible?
How can I add avars and bvars together within their respective years into a new series? As in, avar2000 bvar2000 = cvar2000. Without writing it out and without writing a loop.

I wrote a loop and it works, but it's... well, there's a better way, right?

for (b in 2000:2003) {
  avar = paste0("avar_",b)
  bvar = paste0("bvar_",b)
  cvar = paste0("cvar_",b)
  
  df2<- df2%>% 
    mutate(
      !!cvar := get(avar)   get(bvar))   
}

CodePudding user response：

If the columns are in the same order, we could use

library(dplyr)
library(stringr)
df %>% 
  mutate(across(starts_with('avar'), 
   .names = "{str_replace(.col, 'avar', 'cvar')}")   
      across(starts_with('bvar')))

-output

 avar_2000 avar_2001 avar_2002 bvar_2000 bvar_2001 bvar_2002 cvar_2000 cvar_2001 cvar_2002
1         2         2         7         9         5         3        11         7        10
2         5         3         2         1         5         8         6         8        10
3         1         1         5         1         3        NA         2         4        NA

Or another option with dplyover

library(dplyover)
df %>%
   mutate(across2(starts_with('avar'), 
    starts_with('bvar'), ~ .x   .y, .names_fn = 
      ~ str_replace(.x, "avar_(\\d )_.*", "cvar_\\1")))

-output

avar_2000 avar_2001 avar_2002 bvar_2000 bvar_2001 bvar_2002 cvar_2000 cvar_2001 cvar_2002
1         2         2         7         9         5         3        11         7        10
2         5         3         2         1         5         8         6         8        10
3         1         1         5         1         3        NA         2         4        NA

Or in base R, use split.default

df[paste0("cvar_", 2000:2002)] <- lapply(split.default(df, 
      trimws(names(df), whitespace = ".*_")), rowSums)

CodePudding user response：

The following base R solution just makes it as a one-liner. I don't think you are missing an elegant dplyr solution here - the sensible way would be to pivot your data into longer format and keep it that way.

sapply(unique(substr(names(df), 1, 4)), \(x) rowSums(df[grepl(x, names(df))]))
#>      avar bvar
#> [1,]   11   17
#> [2,]   10   14
#> [3,]    7   NA

CodePudding user response：

Here's another base R solution which uses rowsum and group argument.

t(rowsum(t(df), group = sub("_.*", "_sum", colnames(df)), na.rm = F))

#>      avar_sum bvar_sum
#> [1,]       11       17
#> [2,]       10       14
#> [3,]        7       NA

t(rowsum(t(df), group = sub(".*_", "cvar_", colnames(df)), na.rm = F))

#>      cvar_2000 cvar_2001 cvar_2002
#> [1,]        11         7        10
#> [2,]         6         8        10
#> [3,]         2         4        NA