In my dataframe named combined I want to do a cumulative difference for multiple columns in one go. Here is the sample code:
#The column names of variables where i want to do cumdiff is
#stored in a vector
stock_vars<-c("bank_s_l", "bank_u_l", "ofi_s_l", "ofi_u_l",
"pcs_s_l", "pcs_u_l", "govt_s_l","govt_u_l",
"rotw_s_l", "rotw_u_l","hh_s_l", "hh_u_l",
"total_s_l", "total_u_l")
#Trying to do cumdiff across columns
combined<-combined%>%mutate_at(stock_vars, ~accumulate(.x-.y))
accumulate did not work for multiple columns. Here is the desired output for first 5 observations of one column
bank_s_l desired output
-555497.2351 -303573.8477
-251923.3874 -256895.1516
4971.764211 -63187.78958
68159.55379 231826.2753
-163666.7216
CodePudding user response:
Here is a solution. Write a function, newdiff
to compute the differences. It accepts 3 arguments:
x
the input vector;fill
the differences vector has one less element, this argument tells what value will make the output complete;last
is thefill
value placed last, at the end or at the beginning.
x <- c(-555497.235114064, -251923.387378569, 4971.76421076874, 68159.5537924978,
-163666.721553141, 105308.587380903, -398216.766893477, -88543.641285372,
-51652.6806000001, -39545.33742, -6600, 72500, -296795, 67737,
-54438, -100555, -91767, -26238, -40343, -29728)
newdiff <- function(x, fill = NA, last = TRUE){
y <- x[-length(x)] - x[-1]
if(last)
c(y, fill)
else
c(fill, y)
}
res <- cbind(x, y = newdiff(x))
head(res)
#> x y
#> [1,] -555497.235 -303573.85
#> [2,] -251923.387 -256895.15
#> [3,] 4971.764 -63187.79
#> [4,] 68159.554 231826.28
#> [5,] -163666.722 -268975.31
#> [6,] 105308.587 503525.35
Created on 2022-03-13 by the reprex package (v2.0.1)
Now to compute the differences of several columns, use a standard mutate(across(.))
.
suppressPackageStartupMessages(library(dplyr))
newdiff <- function(x, fill = NA, last = TRUE){
y <- x[-length(x)] - x[-1]
if(last)
c(y, fill)
else
c(fill, y)
}
iris %>%
mutate(across(Sepal.Length:Petal.Width, newdiff)) %>% head()
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 0.2 0.5 0.0 0.0 setosa
#> 2 0.2 -0.2 0.1 0.0 setosa
#> 3 0.1 0.1 -0.2 0.0 setosa
#> 4 -0.4 -0.5 0.1 0.0 setosa
#> 5 -0.4 -0.3 -0.3 -0.2 setosa
#> 6 0.8 0.5 0.3 0.1 setosa
Created on 2022-03-13 by the reprex package (v2.0.1)
CodePudding user response:
This is the inverse of cumsum:
my_func <- function(x){
c( x[1], diff(x,1) )
}
library(dplyr)
df %>%
mutate(x = my_func(value))
# applying it to iris dataset: (idea from Rui Barradas)
iris %>%
mutate(across(Sepal.Length:Petal.Width, my_func)) %>% head()
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 -0.2 -0.5 0.0 0.0 setosa
3 -0.2 0.2 -0.1 0.0 setosa
4 -0.1 -0.1 0.2 0.0 setosa
5 0.4 0.5 -0.1 0.0 setosa
6 0.4 0.3 0.3 0.2 setosa
value x
<dbl> <dbl>
1 -555497. -555497.
2 -251923. 303574.
3 4972. 256895.
4 68160. 63188.
5 -163667. -231826.
6 105309. 268975.
7 -398217. -503525.
8 -88544. 309673.
9 -51653. 36891.
10 -39545. 12107.
11 -6600 32945.
12 72500 79100
13 -296795 -369295
14 67737 364532
15 -54438 -122175
16 -100555 -46117
17 -91767 8788
18 -26238 65529
19 -40343 -14105
20 -29728 10615
df <- as_tibble(c(-555497.235114064, -251923.387378569, 4971.76421076874, 68159.5537924978,
-163666.721553141, 105308.587380903, -398216.766893477, -88543.641285372,
-51652.6806000001, -39545.33742, -6600, 72500, -296795, 67737,
-54438, -100555, -91767, -26238, -40343, -29728))