I have the following loop that runs 5 different Random Forest Models on 5 different datasets (these datasets are in a list object):
# Base Loop : Works Fine
results_1 <- list()
results_2 <- list()
for (i in 1:5){
model_i <- randomForest(class ~ height weight salary, data = X[[i]])
predict_i <- data.frame(predict(model_i , test_set ,type="prob"))
predict_i$id = 1:nrow(predict_i)
results_1[[i]] <- model_i
results_2[[i]] <- predict_i
}
I would like to save each of these models (in "my documents") using the "model_i.RDS" format. If this were to work, there would be 5 RDS files (model_1.RDS, model_2.RDS, model_3.RDS, model_4.RDS, model_5.RDS) in saved/created in "my documents".
I thought R would pick up on this by itself seeing as I have defined the "index i" already:
# Method 1: Does Not Work
results_1 <- list()
results_2 <- list()
for (i in 1:5){
model_i <- randomForest(class ~ height weight salary, data = X[[i]])
saveRDS(model_i, "model_i.RDS")
predict_i <- data.frame(predict(model_i , test_set ,type="prob"))
predict_i$id = 1:nrow(predict_i)
results_1[[i]] <- model_i
results_2[[i]] <- predict_i
}
But this is only saving a single "RDS" file.
I then tried to be more explicit with the saving command:
# Method 2: Also Not Working
wd = getwd()
results_1 <- list()
results_2 <- list()
for (i in 1:5){
model_i <- randomForest(class ~ height weight salary, data = X[[i]])
saveRDS(model_i, paste0("wd", paste("model_", i, ".RDS")))
predict_i <- data.frame(predict(model_i , test_set ,type="prob"))
predict_i$id = 1:nrow(predict_i)
results_1[[i]] <- model_i
results_2[[i]] <- predict_i
}
But this still isn't working (a single file is being saved instead of 5 files).
Can someone please show me how to fix this problem?
Note : Sample Data for Problem:
library(randomForest)
test_set = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
train_data_1 = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
train_data_2 = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
train_data_3 = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
train_data_4 = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
train_data_5 = data.frame( class = as.factor(sample(c(0,1), replace=TRUE, size=100)), height = rnorm(100,100,100), weight = rnorm(100,100,100), salary = rnorm(100,100,100))
# data used in question
X = list(train_data_1, train_data_2, train_data_3, train_data_4, train_data_5)
CodePudding user response:
In the last attempt, the OP used paste
, but there was no /
between the working directory and file name - safer to use file.path
. In addition, the wd
used was "wd"
which will be taken literally instead of the value stored in the object. Instead, it can be paste0(wd, "/model_", i, ".RDS")
results_1 <- vector('list', 5)
results_2 <- vector('list', 5)
for (i in 1:5){
model_i <- randomForest(class ~ height weight salary, data = X[[i]])
saveRDS(model_i, file.path(wd, paste0("model_", i, ".RDS")))
predict_i <- data.frame(predict(model_i , test_set ,type="prob"))
predict_i$id = 1:nrow(predict_i)
results_1[[i]] <- model_i
results_2[[i]] <- predict_i
}
-output