I'm trying to get a simple dopar
example working so I can parallelize some very expensive cross validation calculations. When I run the following code, my result array r
is filled with NA
:
library(doParallel)
library(parallel)
library(foreach)
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
# parallel::detectCores() # 16 on Ryzen 7 system
cl = parallel::makeCluster(detectCores() - 2, output = "")
# activate cluster for foreach library
doParallel::registerDoParallel(cl)
r = array(0, c(3, 3))
for (i in 1:3) {
r[i, ] = foreach::foreach(j = 1:length(data_list[[i]]),
.combine = rbind) %dopar% {
mean(data_list[[i]][j])
}
}
# stop cluster to free up resources
parallel::stopCluster(cl)
The results I'm expecting in r
array is:
[,1] [,2] [,3]
[1,] 5 6 7
[2,] 8 9 10
[3,] 11 12 13
CodePudding user response:
If you create a helper function and run that in foreach
it might simplify things. You can let foreach
be the iterator of list elements, so you can skip the for
loop. Then each worker will have a clear set of data (one list element) and function (mean_inner_list
) to run.
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
#----------
library(foreach)
mean_inner_list <- function(x) lapply(x, mean)
foreach(x = data_list, .combine = rbind) %dopar% mean_inner_list(x)
#> Warning: executing %dopar% sequentially: no parallel backend registered
#> 1 2 3
#> result.1 5 6 7
#> result.2 8 9 10
#> result.3 11 12 13
Created on 2022-12-07 by the reprex package (v2.0.0)
CodePudding user response:
It looks like you have an extra . in front of dopar in your foreach statement. Try removing that and see if it fixes the problem.
Here is the updated code with the . removed:
library(doParallel)
library(parallel)
library(foreach)
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
# parallel::detectCores() # 16 on Ryzen 7 system
cl = parallel::makeCluster(detectCores() - 2, output = "")
# activate cluster for foreach library
doParallel::registerDoParallel(cl)
r = array(0, c(3, 3))
for (i in 1:3) {
r[i, ] = foreach::foreach(j = 1:length(data_list[[i]]),
.combine = rbind) %dopar% {
mean(data_list[[i]][j])
}
}
# stop cluster to free up resources
parallel::stopCluster(cl)