I have 100 files and have starting and ending coordinates for each file. So based on starting and ending coordinates, I want to extract the regions from all data sets and want to store in file. I have used following approach but its not giving me the expected out put.
startco
have the starting indices of 1st
2nd
3rd
file respectively and endco
have ending indices of 1st
2nd
3rd
file respectively. And if the indices is going beyond the files indices want to put NA
Example:
startco<-c(3,4,1)
endco<-c(5,6,2)
ctc<-c(1,2,3)
for (hm0 in 1:length(ctc)) {
for (hm1 in 1:length(startco)) {
for (hm2 in 1:length(endco)) {
methd1<-read.table( paste0("path/to folder/","file_",ctc[hm0],".txt"))
methd2<- methd1[,startco[hm1]:endco[hm2]]
}
}
}
File_1.txt
V1 V2 V3 V4 V5
41 42 43 45 46
0.31 0.21 0.87 0.65 0.54
0.32 0.28 0.74 0.87 0.65
0.19 0.12 0.99 0.99 0.89
File_2.txt
V1 V2 V3 V4 V5
12 24 13 14 16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14
File_3.txt
V1 V2 V3 V4 V5
1 2 3 5 6
0.20 0.40 0.50 0.49 0.52
Expected output :
43 45 46
0.87 0.65 0.54
0.74 0.87 0.65
0.99 0.99 0.89
0.22 0.34 NA
0.34 0.41 NA
0.99 0.89 NA
1 2
0.20 0.40
CodePudding user response:
in Base R you could do:
fun <- function(path, start, end){
id <- basename(path)
dat <- read.table(path, header = TRUE)
p <- ncol(dat)
n <- nrow(dat)
neg <- if(start<0) -start else 0
add <- matrix(nrow = n, ncol = neg)
if (start < 1) start <- 1
if (end > p) end <- p
d <- cbind(add, dat[, start:end])
names(d) <- paste0('X', seq(ncol(d)))
cbind(id,r = seq(nrow(d)), d)
}
startco<-c(3,4,-2) # TAKES NEGATIVE INDICES
endco<-c(5,6,2)
ctc<-c(1,2,3)
files <- file.path('path/to/folder', ctc)
A <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), A)[, -(1:2)]
X1 X2 X3 X4
1 43.00 45.00 46.00 NA
2 0.87 0.65 0.54 NA
3 0.74 0.87 0.65 NA
4 0.99 0.99 0.89 NA
5 14.00 16.00 NA NA
6 0.22 0.34 NA NA
7 0.34 0.41 NA NA
8 0.26 0.14 NA NA
9 NA NA 1.00 2.0
10 NA NA 0.20 0.4
The one with no negatives
startco<-c(3,4,1)
B <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), B)[, -(1:2)]
X1 X2 X3
1 43.00 45.00 46.00
2 0.87 0.65 0.54
3 0.74 0.87 0.65
4 0.99 0.99 0.89
5 14.00 16.00 NA
6 0.22 0.34 NA
7 0.34 0.41 NA
8 0.26 0.14 NA
9 1.00 2.00 NA
10 0.20 0.40 NA
CodePudding user response:
I would use a readfun
,
readfun <- \(i, s, e) {
stopifnot(s != 0)
r <- read.table(paste0("foo1/", "file_", i, ".txt"), header=TRUE)
if (e > ncol(r)) { ## inserts cols to the right if e > ncol
e1 <- e - ncol(r)
nm <- paste0('V', as.numeric(substring(colnames(r), 2)[ncol(r)]) seq_len(e1))
m <- matrix(NA_real_, nrow(r), e1, dimnames=list(NULL, nm))
r <- cbind(r, m)
}
if (s < 0) { ## inserts cols to the left if s < 0
m <- matrix(NA_real_, nrow(r), -s)
r <- cbind(m, r)
e <- e -s
s <- 1
}
out <- r[, s:e]
unname(as.matrix(out))
}
in Map
.
ctc <- c(1, 2, 3); startco <- c(3, 4, -2); endco <- c(5, 6, 2)
Map(readfun, ctc, startco, endco)
# [[1]]
# [,1] [,2] [,3]
# [1,] 43.00 45.00 46.00
# [2,] 0.87 0.65 0.54
# [3,] 0.74 0.87 0.65
# [4,] 0.99 0.99 0.89
#
# [[2]]
# [,1] [,2] [,3]
# [1,] 14.00 16.00 NA
# [2,] 0.22 0.34 NA
# [3,] 0.34 0.41 NA
# [4,] 0.26 0.14 NA
#
# [[3]]
# [,1] [,2] [,3] [,4]
# [1,] NA NA 1.0 2.0
# [2,] NA NA 0.2 0.4
Data:
dir.create('foo1')
write.table(read.table(header=TRUE, text='
V1 V2 V3 V4 V5
41 42 43 45 46
0.31 0.21 0.87 0.65 0.54
0.32 0.28 0.74 0.87 0.65
0.19 0.12 0.99 0.99 0.89'), './foo1/file_1.txt', row.names=F)
write.table(read.table(header=TRUE, text='
V1 V2 V3 V4 V5
12 24 13 14 16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14'), './foo1/file_2.txt', row.names=F)
write.table(read.table(header=TRUE, text='
V1 V2 V3 V4 V5
1 2 3 5 6
0.20 0.40 0.50 0.49 0.52 '), './foo1/file_3.txt', row.names=F)