I need to load a lot of files into R, all of these files have a date hidden in their name as follows: SD07_TWK_20190822_003004
I would like to select the files that are loaded based on these dates.
I load the files into R like this:
filenames = list.files(path=path, pattern=".txt")
colnamesfull = c("time","v","a","t1","t2","t3","t4","t5","t6","t7","t8")
for(i in filenames){
filepath = file.path(path, paste(i, sep=""))
assign(i, read.table(filepath,
skip= 20,
col.names= colnamesfull,
sep=",")
)}
To filter on dates, I assume I need to add a date range in the 'pattern' within the list.files function. However, I can't get that to work.
Say I have the following dates:
date_start = "20190822"
date_end = "20190823"
How would I add a filter for these dates into the code above?
Example of code and files:
#path = "C:/path"
filenames = list.files(path=path, pattern=".txt")
names = substr(filenames,10,17)
date_start = "20190822"
date_end = "20190822"
for(i in filenames){
filepath = file.path(path, paste(i, sep=""))
if( (date_start <= substr(filepath, 10, 17)) &
(substr(filepath, 10, 17) <= date_end )){
assign(i, read.table(filepath,
skip= 20,
col.names= colnamesfull,
sep=","))}}
Some files:
> dput(file1)
structure(list(time = c(2, 3.9, 5.8, 7.8, 9.7, 11.7, 13.6, 15.5,
17.5, 19.4), v = c(14.82, 14.804, 14.82, 14.82, 14.804, 14.82,
14.812, 14.804, 14.8, 14.808), a = c(1.5, 1.476, 1.5, 1.491,
1.452, 1.476, 1.478, 1.44, 1.454, 1.438), t1 = c(14.61, 14.61,
14.61, 14.61, 14.61, 14.61, 14.61, 14.62, 14.62, 14.63), t2 = c(14.63,
14.62, 14.62, 14.62, 14.62, 14.62, 14.62, 14.63, 14.63, 14.64
), t3 = c(14.63, 14.63, 14.63, 14.63, 14.63, 14.63, 14.63, 14.63,
14.64, 14.65), t4 = c(14.65, 14.65, 14.65, 14.65, 14.64, 14.64,
14.65, 14.65, 14.66, 14.67), t5 = c(14.65, 14.65, 14.65, 14.65,
14.65, 14.65, 14.66, 14.66, 14.67, 14.69), t6 = c(14.63, 14.63,
14.63, 14.63, 14.63, 14.63, 14.63, 14.64, 14.65, 14.66), t7 = c(14.64,
14.64, 14.64, 14.64, 14.64, 14.64, 14.64, 14.64, 14.65, 14.66
), t8 = c(14.6, 14.6, 14.6, 14.6, 14.6, 14.6, 14.61, 14.61, 14.62,
14.63)), row.names = c(NA, 10L), class = "data.frame")
> dput(file2)
structure(list(time = c(2, 3.9, 5.9, 7.8, 9.7, 11.7, 13.6, 15.6,
17.5, 19.5), v = c(14.808, 14.808, 14.816, 14.816, 14.808, 14.804,
14.816, 14.8, 14.808, 14.808), a = c(1.478, 1.472, 1.491, 1.49,
1.47, 1.484, 1.452, 1.473, 1.465, 1.482), t1 = c(14.63, 14.63,
14.63, 14.63, 14.62, 14.62, 14.63, 14.63, 14.63, 14.64), t2 = c(14.64,
14.64, 14.64, 14.64, 14.64, 14.64, 14.64, 14.64, 14.65, 14.65
), t3 = c(14.65, 14.65, 14.65, 14.65, 14.65, 14.64, 14.65, 14.65,
14.65, 14.66), t4 = c(14.66, 14.66, 14.66, 14.66, 14.66, 14.66,
14.66, 14.67, 14.67, 14.68), t5 = c(14.67, 14.67, 14.67, 14.67,
14.67, 14.67, 14.67, 14.68, 14.69, 14.7), t6 = c(14.65, 14.65,
14.65, 14.65, 14.65, 14.65, 14.65, 14.65, 14.66, 14.67), t7 = c(14.65,
14.65, 14.65, 14.65, 14.65, 14.65, 14.65, 14.66, 14.66, 14.67
), t8 = c(14.62, 14.62, 14.62, 14.62, 14.62, 14.62, 14.62, 14.62,
14.63, 14.64)), row.names = c(NA, 10L), class = "data.frame")
CodePudding user response:
Starting with a filename following the pattern of "SD07_TWK_20190822_003004"
you can extract the date as characters 10 through 17:
> substr("SD07_TWK_20190822_003004", 10, 17)
[1] "20190822"
Now this is a character string. But as someone clever ordered the year, month and date in a sensible order, you can compare these strings as if they were dates:
example <- substr("SD07_TWK_20190822_003004", 10, 17)
date_start = "20190822"
date_end = "20190823"
example >= date_start
example < date_end
Now if you want to see, whether a date is in between date_start
and date_end
this becomes something like
> date_start <= example & example <= date_end
[1] TRUE
Let's try it with a date a month early and a year late to see if we get the desired FALSE
:
> example = "20190722"
> date_start <= example & example <= date_end
[1] FALSE
> example = "20200822"
> date_start <= example & example <= date_end
[1] FALSE
Great, now all you have to do is condition the assign
in your loop with an if
statement, something along the lines of
if( (start_date <= substr(filepath, 10, 17)) &
(substr(filepath, 10, 17) <= end_date ) ){
assign(....)
}
I cannot close without giving the advice to reconsider, whether assign
is a good choice here. You should probably rather store those data.frames
in a list, but that is not the topic of this question.