I am trying to calculate a monthly price index for multiple countries which have different base date.
Here is a sample dataframe.
Date <- as.Date(c("2004-01-01","2004-02-01", "2004-03-01","2004-04-01","2004-05-01","2004-06-01","2004-07-01", "2004-08-01","2004-09-01","2004-10-01","2004-11-01","2004-12-01","2005-01-01","2005-02-01","2005-03-01","2005-04-01","2005-05-01","2005-06-01",
"2009-01-01","2009-02-01", "2009-03-01","2009-04-01","2009-05-01","2009-06-01","2009-07-01","2009-08-01","2009-09-01","2009-10-01","2009-11-01","2009-12-01","2010-01-01","2010-02-01","2010-03-01","2010-04-01","2010-05-12","2010-06-01","2006-06-01","2006-07-01","2006-08-01","2006-09-01","2006-10-01","2006-11-01","2006-12-01","2007-01-01","2007-02-01","2007-03-01","2007-04-01","2007-05-01","2007-06-01"))
Country <- c("Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands","Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands","Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands", "Netherlands","Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"France", "France", "France","France", "France", "France","France", "France", "France","France", "France", "France","France")
Amount <- as.numeric(c(100,50,40,3.50,14.60,11.60,140.20,140.30,147.30,151.20,159.60,165.60,173.20,177.30,185.30,199.30,217.10,234.90,10.20,3.10,16.00,4.90,17.60,5.30,10.90,12.80,8.40,140.20,140.30,147.30,151.20,159.60,165.60,173.20,177,40,50,.30,185.30,199.30,217.10,234.9,500,300,0,8.40,140.20,140.30,3))
df <- data.frame(Date,Country, Amount)
I tried to find the earliest date for each country but following code gives me only the earliest date per row.
df <- df %>%
group_by(Country,Date)%>%
mutate(Base_date = pmin(Date))
I would want to have a dataframe like this. The column Base_Date shows the earliest date for each country and PriceIndex takes into consideration Base_Date which is different for each country.
Base_Date <- as.Date(c("1.1.04","1.1.04","1.1.04","1.1.04","1.1.04","1.1.04","1.1.04", "1.1.04", "1.1.04","1.1.04","1.1.04","1.1.04","1.1.04","1.1.04","1.1.04","1.1.04", "1.1.04","1.1.04","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09","1.1.09", "1.1.09","1.1.09","1.1.09","1.1.09","1.8.06","1.8.06","1.8.06",
"1.8.06","1.8.06","1.8.06","1.8.06","1.8.06","1.8.06","1.8.06","1.8.06"))
PriceIndex <- c(100,-50,-60,-96.5,-85.4,-88.4,40.2,40.3,47.3,51.2,59.6,65.6,73.2,77.3,85.3,99.3,117.1,134.9,100,-69.61,56.86,-51.96,72.55,-48.04,6.86,25.49,-17.65,1274.51,1275.49,1344.12,1382.35,1464.71,1523.53,1598.04,1635.29,292.16,100,-99.4,270.6,298.6,334.2,369.8,900,500,-100,-83.2,180.4,180.6,-94)
df <- data.frame(Date,Country, Amount,Base_Date,PriceIndex)
Thank you for your suggestions.
CodePudding user response:
Maybe you want something like this:
library(dplyr)
df %>%
group_by(Country) %>%
mutate(Base_date = min(Date),
PriceIndex = Amount - first(Amount))
Output:
# A tibble: 49 × 5
# Groups: Country [3]
Date Country Amount Base_date PriceIndex
<date> <chr> <dbl> <date> <dbl>
1 2004-01-01 Netherlands 100 2004-01-01 0
2 2004-02-01 Netherlands 50 2004-01-01 -50
3 2004-03-01 Netherlands 40 2004-01-01 -60
4 2004-04-01 Netherlands 3.5 2004-01-01 -96.5
5 2004-05-01 Netherlands 14.6 2004-01-01 -85.4
6 2004-06-01 Netherlands 11.6 2004-01-01 -88.4
7 2004-07-01 Netherlands 140. 2004-01-01 40.2
8 2004-08-01 Netherlands 140. 2004-01-01 40.3
9 2004-09-01 Netherlands 147. 2004-01-01 47.3
10 2004-10-01 Netherlands 151. 2004-01-01 51.2
# … with 39 more rows