How grip files based on words from a file?-CodePudding

I have this text file:

 l=c("ced","nad")
 h=c("SAF","EYR")
 res=cbind(l,h)

and this list of files:

 dirf<- list.files ("path", "*.txt", full.names = TRUE)

example of files

 ced_SAF_jkh_2020.txt
 ced_EYR_jkh_2001.txt
 nad_SAF_jkh_200.txt
 nad_EYR_jkh_200.txt

I want to grip files that contain both words in the two columns, so the files i need

 ced_SAF_jkh_2020.txt
 nad_EYR_jkh_200.txt

CodePudding user response：

You can construct the name from the matrix and use that, i.e.

do.call(paste, c(data.frame(res), sep = '_'))
#[1] "ced_SAF" "nad_EYR"

To grep them you can do,

ptrn <- do.call(paste, c(data.frame(res), sep = '_'))

grep(paste(ptrn, collapse = '|'), x, value = TRUE)
#[1] "ced_SAF_jkh_2020.txt" "nad_EYR_jkh_200.txt"

where x,

dput(x)
c("ced_SAF_jkh_2020.txt", "ced_EYR_jkh_2001.txt", "nad_SAF_jkh_200.txt", 
"nad_EYR_jkh_200.txt")

CodePudding user response：

Another possible solution, based on tidyverse:

library(tidyverse)

l=c("ced","nad")
h=c("SAF","EYR")
res=cbind(l,h)

df <- data.frame(
  files = c("ced_SAF_jkh_2020.txt","ced_EYR_jkh_2001.txt","nad_SAF_jkh_200.txt",
            "nad_EYR_jkh_200.txt")
)

df %>% 
  filter((str_detect(files, res[1,1]) & str_detect(files, res[1,2])) | 
  (str_detect(files, res[2,1]) & str_detect(files, res[2,2])))

#>                  files
#> 1 ced_SAF_jkh_2020.txt
#> 2  nad_EYR_jkh_200.txt

Or, more compactly, with purrr::map2_dfr:

library(tidyverse)

map2_dfr(res[,1], res[,2],
         ~ filter(df, (str_detect(files, .x) & str_detect(files, .y))))

#>                  files
#> 1 ced_SAF_jkh_2020.txt
#> 2  nad_EYR_jkh_200.txt

CodePudding user response：

You can use sprintf() paste(collapse = '|') to make the expected syntax of regular expression and pass it to list.files() directly:

regex <- paste(sprintf("%s_%s", l, h), collapse = '|')
# [1] "ced_SAF|nad_EYR"

list.files("path_to_file", regex, full.names = TRUE)

Then all the file names which match the regular expression will be returned.