My Goal is to transform something like this:
df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
# value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)
)
into something like this:
df2 <- data.frame(
value1 = c(100, 40, 40, 220, 300),
# value2=c(a, b*.4, b*.4, b*.2 c d, d e f),
startgroup = c(1, 101, 201, 301, 401),
endgroup = c(100, 200, 300, 400, 500),
groupwidth = c(100, 100, 100, 100, 100)
)
I already managed to do it with a for-loop, but it somehow takes about 5-10 minutes per variable. No idea why tough. and I am certain, that there is a simple way to achieve it.
CodePudding user response:
library(tidyverse)
df1 %>%
filter(groupwidth>0) %>%
rowwise() %>%
mutate(gr = list(c(rep(100, groupwidth %/0), groupwidth %0)),
ln = length(gr)) %>%
unnest(gr) %>%
group_by(st=cumsum(gr-1) %/0) %>%
summarise(val = sum(value1/groupwidth * gr),
startgroup = st[1] * 100 1,
endgroup = startgroup 99,
groupwidth = 100)
# A tibble: 5 x 5
st val startgroup endgroup groupwidth
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0 100 1 100 100
2 1 40 101 200 100
3 2 40 201 300 100
4 3 220 301 400 100
5 4 200 401 500 100
CodePudding user response:
In case anyone wonders, I found a working solution, that works (on the real data) in less than 5 seconds. The solution ended up being a for loop as well.
df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
# value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)) %>%
mutate(startgroup = startgroup - 1)
rm <- max(df1$endgroup)%/0 1
for (i in 1:rm){
df1 <- df1 %>%
mutate(
a = 0,
a = ifelse(startgroup <= 100*i-100 & endgroup > 100*i,
a value1/(endgroup-startgroup)*100,
a),
a = ifelse(startgroup <= 100*i-100 & endgroup <= 100*i & endgroup > 100*i-100,
a value1/(endgroup-startgroup)*(endgroup - (i-1)*100),
a),
a = ifelse(startgroup > 100*i-100 & endgroup <= 100*i,
a value1,
a),
a = ifelse(startgroup > 100*i-100 & startgroup <= 100*i & endgroup > 100*i,
a value1/(endgroup-startgroup)*(i*100 - startgroup),
a),
!!paste0(i*100) := a
)
}
df1 <- df1 %>%
pivot_longer(
cols = contains("00"),
names_to = "upper_bound",
values_to = "value",
values_drop_na = TRUE) %>%
group_by(upper_bound) %>%
mutate(upper_bound = as.integer(upper_bound)) %>%
summarize(
value = sum(value)
)