Home > Enterprise >  Matrix error in code converted from Matlab to R
Matrix error in code converted from Matlab to R

Time:08-31

To cross validation for CCLE (Cancer Cell Line Encyclopedia) drug data I tried to convert the following codes from matlab to R. However, I was unsuccessful. Matlab codes work fine and can create both a *cross.mat that is a group of 10 fold CV data for each data set and a *data.mat that is the grouped data of 10 times of CV of each data set.

I will be appreciate if you can help me find my mistake.

#This function is about 10-fold cross-validation data grouping
getcrossMatrixs <- function(MM){
  library(pracma)
  N <- nnz(MM)
  zeroM <- matrix(0L, nrow = dim(MM)[1], ncol = dim(MM)[2])
  D <- randperm(N)
  first <- floor(N/10) 
  
  w = which(MM != 0, arr.ind=TRUE);
  nrows=w[,1]; ncols=w[,2]
  crossdata  <-  list() 
  
  for (i in 1:10) {
    crossdata[[i]] <- zeroM
  }
  
  for (i in 1:10){
    for (j in (1 (i-1)*first):(i*first)){
      crossdata[[i]][c(nrows[D[j]]),c(ncols[D[j]]) ] <- MM[c(nrows[D[j]]),c(ncols[D[j]])]
      
    }
  }
  k <- (N-(10*first))
  i <- 10*first 1
  for (j in 1:k){
    crossdata[[j]][c(nrows[D[i]]),c(ncols[D[i]]) ] <- MM[c(nrows[D[i]]),c(ncols[D[i]])]
    
    i <- i 1
  }
}

#The following lines is the main for calling above function. 
library(foreach)
n.cores <- parallel::detectCores()
my.cluster <- parallel::makeCluster(
  n.cores, 
  type = "PSOCK"
)
print(my.cluster)
#> socket cluster with 16 nodes on host 'localhost'
doParallel::registerDoParallel(cl = my.cluster)
foreach::getDoParRegistered()
#> [1] TRUE
CCLEdata <- list()
#MM<-matrix(read_csv("MM.csv", col_names = FALSE, show_col_types = FALSE), rownames.force = NA)
MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491) #datamatrix like CCLE drug activity area sensitivity matrrix(491*24)
foreach(i = 1:10) %dopar% {
  CCLEcross <- getcrossMatrixs(MM)
  CCLEdata[[i]] <- CCLEcross
}
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL
#> 
#> [[3]]
#> NULL
#> 
#> [[4]]
#> NULL
#> 
#> [[5]]
#> NULL
#> 
#> [[6]]
#> NULL
#> 
#> [[7]]
#> NULL
#> 
#> [[8]]
#> NULL
#> 
#> [[9]]
#> NULL
#> 
#> [[10]]
#> NULL

Created on 2022-08-29 with reprex v2.0.2

Actually when I use the original CCLE dataset the error is changing in the main.R:

Error in { : task 1 failed - "is.numeric(x) || is.complex(x) is not TRUE"
or
Error in { : 
  task 1 failed - "attempt to select less than one element in integerOneIndex"
%These are from Matlab
function [crossdata] = getcrossMatrixs(MM)
N = nnz(MM(:)); 
zeroM = zeros(size(MM));
D = randperm(N); 
first = floor(N/10); 
[nrows,ncols] = find(MM); 
crossdata = {};
for i = 1:10
    crossdata{i} = zeroM;
end
for i = 1:10
    for j = 1 (i-1)*first:i*first
        crossdata{i}(nrows(D(j)),ncols(D(j))) = MM(nrows(D(j)),ncols(D(j)));
    end
end
 k=N -10*first ;
 i=10*first 1;
  for j=1:k
 crossdata{j}(nrows(D(i)),ncols(D(i))) = MM(nrows(D(i)),ncols(D(i)));
  i=i 1;
  end
end

load('MM.mat')
parfor i=1:10
    [CCLEcross] = getcrossMatrixs(MM);
    CCLEdata{i}=CCLEcross;
end

CodePudding user response:

I didn't look too closely to figure out what was wrong. I based this function on the Matlab function supplied. Note that for this particular example, going parallel is more expensive due to overhead. Parallel will provide performance with large enough matrices and/or more samples.

library(parallel)

MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491)

getcrossMatrixs <- function(MM, parts = 10L) {
  D <- sample(which(MM != 0))
  first <- length(D) %/% parts
  last <- length(D) %% parts
  idx <- c(0L, cumsum(c(rep(first   1L, last), rep(first, parts - last))))
  mZero <- matrix(0, nrow(MM), ncol(MM))
  lapply(1:parts, function(i, m) {m[D[(idx[i]   1L):idx[i   1L]]] <- MM[D[(idx[i]   1L):idx[i   1L]]]; m}, mZero)
}

reps <- 10L
clust <- makeCluster(min(detectCores() - 1L, reps))
clusterExport(clust, c("getcrossMatrixs", "MM"))
CCLEdata <- parLapply(clust, 1:reps, function(x) getcrossMatrixs(MM))
stopCluster(clust)

# check that each set of matrices returned has all elements of MM
identical(rep(list(MM), reps), lapply(1:reps, function(i) Reduce(" ", CCLEdata[[i]], matrix(0, nrow(MM), ncol(MM)))))
#> [1] TRUE

And here's a cleaned-up version of the Matlab function:

function [crossdata] = getcrossMatrixs(MM)
    idx = find(MM);
    N = length(nrows); 
    zeroM = zeros(size(MM));
    idx = idx(randperm(N)); 
    first = floor(N/10); 
    crossdata = cell(10, 1);
    for i = 1:10
        crossdata{i} = zeroM;
    end
    for i = 1:10
        j = 1   (i - 1)*first:i*first;
        crossdata{i}(idx(j)) = MM(idx(j));
    end
    k = N - 10*first;
    j = 10*first   1;
    for i = 1:k
        crossdata{i}(idx(j)) = MM(idx(j));
        j = j   1;
    end
end
  • Related