Extracting dataframe values using indices in R-CodePudding

I have 100 files and have starting and ending coordinates for each file. So based on starting and ending coordinates, I want to extract the regions from all data sets and want to store in file. I have used following approach but its not giving me the expected out put. startco have the starting indices of 1st 2nd 3rd file respectively and endco have ending indices of 1st 2nd 3rd file respectively. And if the indices is going beyond the files indices want to put NA

Example:
startco<-c(3,4,1)
endco<-c(5,6,2)
ctc<-c(1,2,3)

for (hm0 in 1:length(ctc)) {
  for (hm1 in 1:length(startco)) {
    for (hm2 in 1:length(endco)) {
  
  methd1<-read.table( paste0("path/to folder/","file_",ctc[hm0],".txt"))
  methd2<- methd1[,startco[hm1]:endco[hm2]]
  
}
  }
}

File_1.txt

V1   V2   V3   V4   V5
41   42    43   45    46
0.31 0.21 0.87 0.65 0.54    
0.32 0.28 0.74 0.87 0.65 
0.19 0.12 0.99 0.99 0.89 

File_2.txt

V1   V2   V3   V4   V5
12    24   13    14   16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14

File_3.txt

V1   V2   V3   V4   V5
1    2     3    5    6
0.20 0.40 0.50 0.49 0.52 


Expected output :
43   45    46
0.87 0.65 0.54
0.74 0.87 0.65 
0.99 0.99 0.89 
0.22 0.34 NA
0.34 0.41 NA
0.99 0.89 NA
 1 2
 0.20 0.40

CodePudding user response：

in Base R you could do:

 fun <- function(path, start, end){
  id <- basename(path)
  dat <- read.table(path, header = TRUE)
  p <- ncol(dat)
  n <- nrow(dat)
  neg <- if(start<0) -start else 0 
  add <- matrix(nrow = n, ncol = neg)
  if (start < 1) start <- 1
  if (end > p) end <- p
  d <- cbind(add, dat[, start:end])
  names(d) <- paste0('X', seq(ncol(d)))
  cbind(id,r = seq(nrow(d)),  d)
}

startco<-c(3,4,-2) # TAKES NEGATIVE INDICES
endco<-c(5,6,2)
ctc<-c(1,2,3)

files <- file.path('path/to/folder', ctc)
A <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), A)[, -(1:2)]

     X1    X2    X3  X4
1  43.00 45.00 46.00  NA
2   0.87  0.65  0.54  NA
3   0.74  0.87  0.65  NA
4   0.99  0.99  0.89  NA
5  14.00 16.00    NA  NA
6   0.22  0.34    NA  NA
7   0.34  0.41    NA  NA
8   0.26  0.14    NA  NA
9     NA    NA  1.00 2.0
10    NA    NA  0.20 0.4

The one with no negatives

startco<-c(3,4,1)
B <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), B)[, -(1:2)]
     X1    X2    X3
1  43.00 45.00 46.00
2   0.87  0.65  0.54
3   0.74  0.87  0.65
4   0.99  0.99  0.89
5  14.00 16.00    NA
6   0.22  0.34    NA
7   0.34  0.41    NA
8   0.26  0.14    NA
9   1.00  2.00    NA
10  0.20  0.40    NA

CodePudding user response：

I would use a readfun,

readfun <- \(i, s, e) {
  stopifnot(s != 0)
  r <- read.table(paste0("foo1/", "file_", i, ".txt"), header=TRUE)
  if (e > ncol(r)) {  ## inserts cols to the right if e > ncol
    e1 <- e - ncol(r)
    nm <- paste0('V', as.numeric(substring(colnames(r), 2)[ncol(r)])   seq_len(e1))
    m <- matrix(NA_real_, nrow(r), e1, dimnames=list(NULL, nm))
    r <- cbind(r, m)
  }
  if (s < 0) {  ## inserts cols to the left if s < 0
    m <- matrix(NA_real_, nrow(r), -s)
    r <- cbind(m, r)
    e <- e   -s
    s <- 1
  }
  out <- r[, s:e]
  unname(as.matrix(out))
}

in Map.

ctc <- c(1, 2, 3); startco <- c(3, 4, -2); endco <- c(5, 6, 2)

Map(readfun, ctc, startco, endco)
# [[1]]
#       [,1]  [,2]  [,3]
# [1,] 43.00 45.00 46.00
# [2,]  0.87  0.65  0.54
# [3,]  0.74  0.87  0.65
# [4,]  0.99  0.99  0.89
# 
# [[2]]
#       [,1]  [,2] [,3]
# [1,] 14.00 16.00   NA
# [2,]  0.22  0.34   NA
# [3,]  0.34  0.41   NA
# [4,]  0.26  0.14   NA
# 
# [[3]]
#      [,1] [,2] [,3] [,4]
# [1,]   NA   NA  1.0  2.0
# [2,]   NA   NA  0.2  0.4

Data:

dir.create('foo1')

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
41   42    43   45    46
0.31 0.21 0.87 0.65 0.54    
0.32 0.28 0.74 0.87 0.65 
0.19 0.12 0.99 0.99 0.89'), './foo1/file_1.txt', row.names=F)

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
12    24   13    14   16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14'), './foo1/file_2.txt', row.names=F)

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
1    2     3    5    6
0.20 0.40 0.50 0.49 0.52 '), './foo1/file_3.txt', row.names=F)