I have the following dataframe:
dat <- tibble::tribble( ~date , ~pct_monthly_return,
2021-01-31, 0.0023,
2021-02-28, 0.01,
2021-03-31, 0.035)
I would like to create a new column called index
which starts at 100 a month before the dataframe starts (i.e. 2020-12-31 in my example). The index of 100 must then be compounded by pct_monthly_return
variable so that I can see how the index grows over time.
The result should produce the following dataframe:
date pct_monthly_return index
2020-12-31 0 100
2021-01-31 0.0023 100.23
2021-02-28 0.01 101.2323
2021-03-31 0.035 104.7754
CodePudding user response:
We can take the cumulative product of the returns expressed as coefficients, i.e. 1 pct_monthly_return
dat$index = 100 * cumprod(1 dat$pct_monthly_return)
Then to add the header row you described:
rbind(data.frame(date = "2020-12-31", index = 100, pct_monthly_return = 0),
dat)
date index pct_monthly_return
1 2020-12-31 100.0000 0.0000
2 2021-01-31 100.2300 0.0023
3 2021-02-28 101.2323 0.0100
4 2021-03-31 104.7754 0.0350
Sample data
dat <- tibble::tribble( ~date , ~pct_monthly_return,
"2021-01-31", 0.0023,
"2021-02-28", 0.01,
"2021-03-31", 0.035)
CodePudding user response:
Base R:
Use sapply
and cumprod
:
> cbind(dat, index=as.vector(sapply(dat[,-1] 1, cumprod) * 100))
date pct_monthly_return index
1 2021-01-31 0.0023 100.2300
2 2021-02-28 0.0100 101.2323
3 2021-03-31 0.0350 104.7754
>
Or even better:
> cbind(dat, index=cumprod(1 dat$pct_monthly_return) * 100)
date pct_monthly_return index
1 2021-01-31 0.0023 100.2300
2 2021-02-28 0.0100 101.2323
3 2021-03-31 0.0350 104.7754
>
CodePudding user response:
Base R solution:
# Import data: df => data.frame
df <- structure(list(date = structure(c(18658L, 18686L, 18717L), class = c(
"Date")), pct_monthly_return = c(0.0023, 0.01, 0.035)), class = "data.frame", row.names = c(NA,
-3L))
# Add new row calculate index: res => data.frame
res <- transform(
rbind(
data.frame(
date = as.Date("2020-12-31"),
pct_monthly_return = 0.000
),
df
),
index = 100 * cumprod(1 pct_monthly_return)
)
Tidyverse solution:
# Import and initialise package:
library(tidyverse)
# Import tibble: df1 => tibble
df1 <- structure(list(date = structure(c(18658L, 18686L, 18717L), class = c("Date")), pct_monthly_return = c(0.0023, 0.01, 0.035)), row.names = c(NA, -3L),
class = c("tbl_df", "tbl", "data.frame"))
# Calculate answer: res1 => tibble
res1 <- tibble(date = as.Date("2020-12-31"),
pct_monthly_return = 0.000) %>%
bind_rows(., df1) %>%
mutate(index = 100 * cumprod(1 pct_monthly_return))
data.table solution:
# Import and intialise data.table package:
library(data.table)
# Import data: dat => data.table
dat <- structure(list(date = structure(c(18658, 18686, 18717), class = "Date"),
pct_monthly_return = c(0.0023, 0.01, 0.035)), row.names = c(NA,
-3L), class = c("data.table", "data.frame"),
.internal.selfref = <pointer: 0x000002704fbd1ef0>)
# Add row and calculate index: dat_res => data.table
dat_res <- rbindlist(
list(
dat1 = data.table(
date = as.Date("2020-12-31"),
pct_monthly_return = 0.000
),
dat
)
)[,index := 100 * cumprod(1 pct_monthly_return),]
CodePudding user response:
This can be done even with a simple while
loop:
#Assuming you'll always want the first date to be the first row of the new data frame
rbind(data.frame(date=dat$date[1], pct_monthly_return = 0), dat)-> dat
index <- c(100)
i <- 2
while (i <=nrow(dat)) {
index[i] <- (1 dat$pct_monthly_return[i])* index[i-1]
i<- i 1
}
dat$index <- index
> dat
date pct_monthly_return index
1 2021-01-31 0.0000 100.0000
2 2021-01-31 0.0023 100.2300
3 2021-02-28 0.0100 101.2323
4 2021-03-31 0.0350 104.7754
Sample data:
dat <- tibble::tribble( ~date , ~pct_monthly_return,
"2021-01-31", 0.0023,
"2021-02-28", 0.01,
"2021-03-31", 0.035)