Data Frame example:
Country= c('Angola', 'Angola', 'Angola', 'Angola', 'Angola', 'Angola', 'Algeria', 'Algeria', 'Algeria', 'Algeria', 'Algeria', 'Algeria')
Year= c( 2000, 2000, 2001, 2001, 2002,2002, 2000, 2000, 2001, 2001, 2002,2002)
Species= c( 'Goats', 'Sheep ', 'Goats', 'Sheep', 'Goats', 'Sheep', 'Goats', 'Sheep', 'Goats', 'Sheep', 'Goats', 'Sheep')
Pop= c(20, 30, 22, 34, 18, 35, 24, 26, 40, 30 , 23, 43)
data <- data.frame(Country, Year, Species, Pop)
The loop function to predict animal population for the next 10 years per country and per specie :
# Create a loop over each country
for (country in unique(data$Country)) {
# Create a loop over each species
for (species in unique(data$Species)) {
# Filter the dataframe by country and species
temp <- subset(data, Country == country & Species == species)
# Fit the ARIMA model
model <- auto.arima(temp$Pop)
# Make the predictions
pred <- predict(model, n.ahead = 10)
# Store the predictions in a dataframe
if (exists("newdata")) {
newdata <- rbind(newdata, data.frame(
Country = country,
Year = seq(max(temp$Year) 1, max(temp$Year) 10),
Species = species,
Predicted_Pop = pred)
)
} else {
newdata <- data.frame(
Country = country,
Year = seq(max(temp$Year) 1, max(temp$Year) 10),
Species = species,
Predicted_Pop = pred
)
}
}
}
The code works on a singular country but does not work when looping , I get the following error:
Error in
[<-.ts
(*tmp*
, ri, value = c(7990484, 7990484, 7990484, 7990484, : only replacement of elements is allowed
I understand the arima model is creating two lists (pred
and se
) time series object and rbind
is the one not able to combine it as a dataframe? Any pointers will be appreciated. I tried replace()
function but still got the same error. Thanks!
I expect the loop to produce a list of animal population for each country and each species for future 10 years.
SOLVED Thanks to @Parfait suggestion I used his approach within my initial code:
library(forecast)
results<- data.frame()
# Create a loop over each country
for (country in unique(data$Country)) {
for (species in unique(data$Species)) {
# Filter the dataframe by country and species
temp <- subset(FAO.DB, Country == country & Species == species)
# Check if the dataframe is empty
if(nrow(temp) > 1){
# Fit the ARIMA model
model <- auto.arima(temp$Pop)
# Make the predictions
pred <- forecast(model, n.ahead = 10)
# Store the predictions in a dataframe
newdata <- data.frame( Country = country,
Year = seq(max(temp$Year) 1,
max(temp$Year) 10),
Species = species,
Predicted_Pop = pred)
# Store the predictions in a dataframe
results <- rbind(results, newdata)
}
}
}
I have also changed function predict() by the function forecast()
CodePudding user response:
Issue likely is due to an empty temp
when slicing data frame by unique values of Country and Species. Instead of nested for
loops, consider generializing your process in a single function, then split your data frame into those groups with by
or split
, and then process each split through your defined method. This will have you working with lists of data frames that you can then rbind
once at the end.
# User-defined method
predict_population <- function(temp) {
if (NROW(temp) > 1) {
# Fit the ARIMA model
model <- auto.arima(temp$Pop)
# Make the predictions
pred <- predict(model, n.ahead = 10)
# Store the predictions in a dataframe
newdata <- data.frame(
Country = temp$Country[1],
Year = seq(max(temp$Year) 1, max(temp$Year) 10),
Species = temp$Species[1],
Predicted_Pop = pred
)
}
}
# APPROACH 1:
# Filter the dataframe by country and species with `by`
df_list <- by(data, data[c("Country", "Species")], predict_population)
# APPROACH 2:
# Filter the dataframe by country and species with `split`
# Run method with `lapply`
df_list <- split(data, data[c("Country", "Species")]) |> lapply(predict_population)
# RBIND ALL DFs TO MASTER DATA FRAME
new_data <- do.call(rbind, unname(df_list))