Home > Software design >  Calculate rows based on previous rows value
Calculate rows based on previous rows value

Time:07-09

I have a data frame like below:

df <- data.frame(id = c(12345,12345,12345,221,221,221),
                 range_key = c('2022 Q2','2022 Q3','2023 Q4','2022 Q2','2023 Q3','2023 Q4'),
                 country = c('US','US','US','CA','CA','CA'),
                 value = c(10,8,NA,5,NA,NA), 
                 pct = c(-0.2,-0.2,-0.2,-0.12,-0.12,-0.12))

In each group I want to calculate values which are NA with the previous row value * pct

--> value = value   (value * ((pct))

CodePudding user response:

This is a recursive computation. You could use accumulate2() from purrr.

library(dplyr)
library(purrr)

df %>%
  group_by(id, country) %>%
  mutate(value = accumulate2(value, pct[-n()],
                   ~ if(is.na(..2)) ..1 * (1   ..3) else ..2) %>% flatten_dbl) %>%
  ungroup()

# A tibble: 6 × 5
     id range_key country value   pct
  <dbl> <chr>     <chr>   <dbl> <dbl>
1 12345 2022 Q2   US      10    -0.2 
2 12345 2022 Q3   US       8    -0.2
3 12345 2023 Q4   US       6.4  -0.2
4   221 2022 Q2   CA       5    -0.12
5   221 2023 Q3   CA       4.4  -0.12
6   221 2023 Q4   CA       3.87 -0.12

The formula-like syntax is converted to a function under the hood

function(x1, x2, y) if(is.na(x2)) x1 * (1   y) else x2

CodePudding user response:

Does this work for you?

library(dplyr)

df <- data.frame(
  stringsAsFactors = FALSE,
                id = c(1, 1, 1, 2, 2, 2),
         range_key = c("2022 Q2","2022 Q3","2023 Q4",
                       "2022 Q2","2023 Q3","2023 Q4"),
           country = c("US", "US", "US", "CA", "CA", "CA"),
             value = c(10, 8, NA, 5, NA, NA),
               pct = c(-0.2, -0.2, -0.2, -0.12, -0.12, -0.12)
)

df %>% 
  group_by(id, country) %>% 
  mutate(
    value = if_else(is.na(value), lag(value)   lag(value) * lag(pct), value)
  )

#> # A tibble: 6 × 5
#> # Groups:   id, country [2]
#>      id range_key country value   pct
#>   <dbl> <chr>     <chr>   <dbl> <dbl>
#> 1     1 2022 Q2   US       10   -0.2 
#> 2     1 2022 Q3   US        8   -0.2 
#> 3     1 2023 Q4   US        6.4 -0.2 
#> 4     2 2022 Q2   CA        5   -0.12
#> 5     2 2023 Q3   CA        4.4 -0.12
#> 6     2 2023 Q4   CA       NA   -0.12

Created on 2022-07-08 by the reprex package (v2.0.1)

CodePudding user response:

Another possible solution:

library(dplyr)

fill.na <- function(x, y)
{
  for (i in 1:length(x))
    if (is.na(x[i]))
      x[i] <- x[i-1] *(1 y[i-1])
  x
}

df %>%
  group_by(id, country) %>%
  mutate(value = fill.na(value, pct)) %>%
  ungroup

#> # A tibble: 6 × 5
#>      id range_key country value   pct
#>   <dbl> <chr>     <chr>   <dbl> <dbl>
#> 1 12345 2022 Q2   US      10    -0.2 
#> 2 12345 2022 Q3   US       8    -0.2 
#> 3 12345 2023 Q4   US       6.4  -0.2 
#> 4   221 2022 Q2   CA       5    -0.12
#> 5   221 2023 Q3   CA       4.4  -0.12
#> 6   221 2023 Q4   CA       3.87 -0.12

CodePudding user response:

Here is another approach

df <- plyr::ldply(by(df,df$id,subset), function(ID_subset){
    temp_subset <- plyr::ldply(by(ID_subset, ID_subset$country, subset), function(country_subset){
        country_subset
        for (i in 1:nrow(country_subset)) {
          if (is.na(country_subset$value[i])) {
            country_subset$value[i] <- country_subset$value[i - 1]   (country_subset$value[i - 1] * country_subset$pct[i - 1])
          }
        }}}
  • Related