I have a data frame like below:
df <- data.frame(id = c(12345,12345,12345,221,221,221),
range_key = c('2022 Q2','2022 Q3','2023 Q4','2022 Q2','2023 Q3','2023 Q4'),
country = c('US','US','US','CA','CA','CA'),
value = c(10,8,NA,5,NA,NA),
pct = c(-0.2,-0.2,-0.2,-0.12,-0.12,-0.12))
In each group I want to calculate values which are NA with the previous row value * pct
--> value = value (value * ((pct))
CodePudding user response:
This is a recursive computation. You could use accumulate2()
from purrr
.
library(dplyr)
library(purrr)
df %>%
group_by(id, country) %>%
mutate(value = accumulate2(value, pct[-n()],
~ if(is.na(..2)) ..1 * (1 ..3) else ..2) %>% flatten_dbl) %>%
ungroup()
# A tibble: 6 × 5
id range_key country value pct
<dbl> <chr> <chr> <dbl> <dbl>
1 12345 2022 Q2 US 10 -0.2
2 12345 2022 Q3 US 8 -0.2
3 12345 2023 Q4 US 6.4 -0.2
4 221 2022 Q2 CA 5 -0.12
5 221 2023 Q3 CA 4.4 -0.12
6 221 2023 Q4 CA 3.87 -0.12
The formula-like syntax is converted to a function under the hood
function(x1, x2, y) if(is.na(x2)) x1 * (1 y) else x2
CodePudding user response:
Does this work for you?
library(dplyr)
df <- data.frame(
stringsAsFactors = FALSE,
id = c(1, 1, 1, 2, 2, 2),
range_key = c("2022 Q2","2022 Q3","2023 Q4",
"2022 Q2","2023 Q3","2023 Q4"),
country = c("US", "US", "US", "CA", "CA", "CA"),
value = c(10, 8, NA, 5, NA, NA),
pct = c(-0.2, -0.2, -0.2, -0.12, -0.12, -0.12)
)
df %>%
group_by(id, country) %>%
mutate(
value = if_else(is.na(value), lag(value) lag(value) * lag(pct), value)
)
#> # A tibble: 6 × 5
#> # Groups: id, country [2]
#> id range_key country value pct
#> <dbl> <chr> <chr> <dbl> <dbl>
#> 1 1 2022 Q2 US 10 -0.2
#> 2 1 2022 Q3 US 8 -0.2
#> 3 1 2023 Q4 US 6.4 -0.2
#> 4 2 2022 Q2 CA 5 -0.12
#> 5 2 2023 Q3 CA 4.4 -0.12
#> 6 2 2023 Q4 CA NA -0.12
Created on 2022-07-08 by the reprex package (v2.0.1)
CodePudding user response:
Another possible solution:
library(dplyr)
fill.na <- function(x, y)
{
for (i in 1:length(x))
if (is.na(x[i]))
x[i] <- x[i-1] *(1 y[i-1])
x
}
df %>%
group_by(id, country) %>%
mutate(value = fill.na(value, pct)) %>%
ungroup
#> # A tibble: 6 × 5
#> id range_key country value pct
#> <dbl> <chr> <chr> <dbl> <dbl>
#> 1 12345 2022 Q2 US 10 -0.2
#> 2 12345 2022 Q3 US 8 -0.2
#> 3 12345 2023 Q4 US 6.4 -0.2
#> 4 221 2022 Q2 CA 5 -0.12
#> 5 221 2023 Q3 CA 4.4 -0.12
#> 6 221 2023 Q4 CA 3.87 -0.12
CodePudding user response:
Here is another approach
df <- plyr::ldply(by(df,df$id,subset), function(ID_subset){
temp_subset <- plyr::ldply(by(ID_subset, ID_subset$country, subset), function(country_subset){
country_subset
for (i in 1:nrow(country_subset)) {
if (is.na(country_subset$value[i])) {
country_subset$value[i] <- country_subset$value[i - 1] (country_subset$value[i - 1] * country_subset$pct[i - 1])
}
}}}