inorder to do a paired analysis i need to write a function that sums integer counts. The total number required to be summed is specified in the corresponding "Yrs_Before" and "Yrs_After" columns in "df". Is there a way i can avoid writing "fm_after" with just one function? Is this a variable masking for "Yrs_Before", "Yrs_After","Before.Yr_1...n 1" and "After.Yr_1...n 1" columns?
data frame
set.seed(123)
(df=data.frame(
Yrs_Before=sample(1:8, 3),
Yrs_After=sample(1:8, 3),
Before.Yr_1=sample(1:8, 3),
Before.Yr_2=sample(1:8, 3),
Before.Yr_3=sample(1:8, 3),
Before.Yr_4=sample(1:8, 3),
Before.Yr_5=sample(1:8, 3),
Before.Yr_6=sample(1:8, 3),
Before.Yr_7=sample(1:8, 3),
Before.Yr_8=sample(1:8, 3),
After.Yr_1=sample(1:8, 3),
After.Yr_2=sample(1:8, 3),
After.Yr_3=sample(1:8, 3),
After.Yr_4=sample(1:8, 3),
After.Yr_5=sample(1:8, 3),
After.Yr_6=sample(1:8, 3),
After.Yr_7=sample(1:8, 3),
After.Yr_8=sample(1:8, 3)
))
function sums the corresponding rows based on the number of years in the before period.
fm=function(data,Yrs_Before){
data |> dplyr::mutate(sums=
ifelse(
Yrs_Before == 1, rowSums(across(Before.Yr_1)),
ifelse(
Yrs_Before == 2, rowSums(across(Before.Yr_1:Before.Yr_2)),
ifelse(
Yrs_Before == 3, rowSums(across(Before.Yr_1:Before.Yr_3)),
ifelse(
Yrs_Before == 4, rowSums(across(Before.Yr_1:Before.Yr_4)),
ifelse(
Yrs_Before == 5, rowSums(across(Before.Yr_1:Before.Yr_5)),
ifelse(
Yrs_Before == 6, rowSums(across(Before.Yr_1:Before.Yr_6)),
ifelse(
Yrs_Before == 7, rowSums(across(Before.Yr_1:Before.Yr_7)),
ifelse(
Yrs_Before == 8, rowSums(across(Before.Yr_1:Before.Yr_8)),"")))))))))
}
output
fm(df,Yrs_Before)
Yrs_Before Yrs_After Before.Yr_1 Before.Yr_2 Before.Yr_3 Before.Yr_4 Before.Yr_5 Before.Yr_6 Before.Yr_7 Before.Yr_8 After.Yr_1 After.Yr_2 After.Yr_3 After.Yr_4 After.Yr_5 After.Yr_6 After.Yr_7 After.Yr_8 sums
1 7 6 2 5 6 3 3 1 3 1 4 3 4 5 1 4 3 2 23
2 8 3 6 4 1 5 1 8 2 6 6 7 7 7 2 5 6 5 33
3 3 2 3 6 2 8 4 5 7 3 1 5 2 1 3 7 1 7 11
duplicated function with variable name changed to "After"
fm_after=function(data,Yrs_After){
data |> dplyr::mutate(sums=
ifelse(
Yrs_After == 1, rowSums(across(After.Yr_1)),
ifelse(
Yrs_After == 2, rowSums(across(After.Yr_1:After.Yr_2)),
ifelse(
Yrs_After == 3, rowSums(across(After.Yr_1:After.Yr_3)),
ifelse(
Yrs_After == 4, rowSums(across(After.Yr_1:After.Yr_4)),
ifelse(
Yrs_After == 5, rowSums(across(After.Yr_1:After.Yr_5)),
ifelse(
Yrs_After == 6, rowSums(across(After.Yr_1:After.Yr_6)),
ifelse(
Yrs_After == 7, rowSums(across(After.Yr_1:After.Yr_7)),
ifelse(
Yrs_After == 8, rowSums(across(After.Yr_1:After.Yr_8)),"")))))))))
}
output
fm_after(df,Yrs_After)
Yrs_Before Yrs_After Before.Yr_1 Before.Yr_2 Before.Yr_3 Before.Yr_4 Before.Yr_5 Before.Yr_6 Before.Yr_7 Before.Yr_8 After.Yr_1 After.Yr_2 After.Yr_3 After.Yr_4 After.Yr_5 After.Yr_6 After.Yr_7 After.Yr_8 sums
1 7 6 2 5 6 3 3 1 3 1 4 3 4 5 1 4 3 2 21
2 8 3 6 4 1 5 1 8 2 6 6 7 7 7 2 5 6 5 20
3 3 2 3 6 2 8 4 5 7 3 1 5 2 1 3 7 1 7 6
CodePudding user response:
We may do this by pivoting to long format
library(dplyr)
library(tidyr)
library(stringr)
df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(rn, Yrs_Before, Yrs_After)) %>%
mutate(yrs = as.numeric(str_extract(name, "\\d $"))) %>%
group_by(rn, grp = str_extract(name, "\\w ")) %>%
summarise(Sum = if(cur_group()$grp == 'Before')
sum(value[yrs <= Yrs_Before], na.rm = TRUE) else
sum(value[yrs <= Yrs_After], na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = grp, values_from = Sum) %>%
select(-rn) %>%
bind_cols(df, .)
-output
Yrs_Before Yrs_After Before.Yr_1 Before.Yr_2 Before.Yr_3 Before.Yr_4 Before.Yr_5 Before.Yr_6 Before.Yr_7 Before.Yr_8 After.Yr_1
1 7 6 2 5 6 3 3 1 3 1 4
2 8 3 6 4 1 5 1 8 2 6 6
3 3 2 3 6 2 8 4 5 7 3 1
After.Yr_2 After.Yr_3 After.Yr_4 After.Yr_5 After.Yr_6 After.Yr_7 After.Yr_8 After Before
1 3 4 5 1 4 3 2 21 23
2 7 7 7 2 5 6 5 20 33
3 5 2 1 3 7 1 7 6 11
It can be wrapped in a function with just input data
fm <- function(data) {
data %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(rn, Yrs_Before, Yrs_After)) %>%
mutate(yrs = as.numeric(str_extract(name, "\\d $"))) %>%
group_by(rn, grp = str_extract(name, "\\w ")) %>%
summarise(Sum = if(cur_group()$grp == 'Before')
sum(value[yrs <= Yrs_Before], na.rm = TRUE) else
sum(value[yrs <= Yrs_After], na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = grp, values_from = Sum) %>%
select(-rn) %>%
bind_cols(df, .)
}
-testing
> fm(df)
Yrs_Before Yrs_After Before.Yr_1 Before.Yr_2 Before.Yr_3 Before.Yr_4 Before.Yr_5 Before.Yr_6 Before.Yr_7 Before.Yr_8 After.Yr_1
1 7 6 2 5 6 3 3 1 3 1 4
2 8 3 6 4 1 5 1 8 2 6 6
3 3 2 3 6 2 8 4 5 7 3 1
After.Yr_2 After.Yr_3 After.Yr_4 After.Yr_5 After.Yr_6 After.Yr_7 After.Yr_8 After Before
1 3 4 5 1 4 3 2 21 23
2 7 7 7 2 5 6 5 20 33
3 5 2 1 3 7 1 7 6 11
CodePudding user response:
EDIT:
in base R you could do:
A <- split.default(df, sub('.*(After|Before).*', '\\1', names(df)))
sapply(A, \(x) rowSums(x[-1] * (col(x[-1]) <= x[[1]])))
After Before
[1,] 21 23
[2,] 20 33
[3,] 6 11
You can then cbind
this to the original dataframe
tidyverse:
use mutate
instead of transmute
in order to retain the original dataframe.
df %>%
rowwise() %>%
transmute(After = cumsum(c_across(matches('Before.Yr')))[Yrs_Before],
Before = cumsum(c_across(matches('After.Yr')))[Yrs_After])
# A tibble: 3 x 2
# Rowwise:
After Before
<int> <int>
1 23 21
2 33 20
3 11 6
CodePudding user response:
To avoid so much code you could use paste
.
f <- \(x, dat=df) sapply(seq_len(nrow(dat)), \(i, ...)
sum(dat[i, paste0(x, '.Yr_', 1:dat[i, paste0('Yrs_', x)])]))
res <- transform(df, sums_bef=f('Before'), sums_aft=f('After'))
res
# Yrs_Before Yrs_After Before.Yr_1 Before.Yr_2 Before.Yr_3
# 1 7 6 2 5 6
# 2 8 3 6 4 1
# 3 3 2 3 6 2
# Before.Yr_4 Before.Yr_5 Before.Yr_6 Before.Yr_7 Before.Yr_8
# 1 3 3 1 3 1
# 2 5 1 8 2 6
# 3 8 4 5 7 3
# After.Yr_1 After.Yr_2 After.Yr_3 After.Yr_4 After.Yr_5
# 1 4 3 4 5 1
# 2 6 7 7 7 2
# 3 1 5 2 1 3
# After.Yr_6 After.Yr_7 After.Yr_8 sums_bef sums_aft
# 1 4 3 2 23 21
# 2 5 6 5 33 20
# 3 7 1 7 11 6
Data:
df <- structure(list(Yrs_Before = c(7L, 8L, 3L), Yrs_After = c(6L,
3L, 2L), Before.Yr_1 = c(2L, 6L, 3L), Before.Yr_2 = c(5L, 4L,
6L), Before.Yr_3 = c(6L, 1L, 2L), Before.Yr_4 = c(3L, 5L, 8L),
Before.Yr_5 = c(3L, 1L, 4L), Before.Yr_6 = c(1L, 8L, 5L),
Before.Yr_7 = c(3L, 2L, 7L), Before.Yr_8 = c(1L, 6L, 3L),
After.Yr_1 = c(4L, 6L, 1L), After.Yr_2 = c(3L, 7L, 5L), After.Yr_3 = c(4L,
7L, 2L), After.Yr_4 = c(5L, 7L, 1L), After.Yr_5 = 1:3, After.Yr_6 = c(4L,
5L, 7L), After.Yr_7 = c(3L, 6L, 1L), After.Yr_8 = c(2L, 5L,
7L)), class = "data.frame", row.names = c(NA, -3L))