I am trying to split excel file with multiple sheets to a new file based on prefix in sheet name.
I would like to output all sheets with prefix a_ to a.xslx , b_ to b.xslx and so on
Here is the code so far:
library(readxl)
library(readr)
library(writexl)
#get sheet names
sheets <- excel_sheets("/path/to/excel/file")
filenames <- paste0(sheets, ".xslx")
#sheet prefix
m <- regexpr("^.*_", filenames)
file_prefix <- unique(regmatches(filenames, m))
prefnames <- paste0(file_prefix, ".xslx")
#read_excel - sheets with same prefix
datspref <- lapply(file_prefix, read_excel, path = "/path/to/excel/file")
#save sheets with same prefix to a new excel file
lapply(seq_along(dats), function(i) write_xlsx(dats[[i]], prefnames[i]))
Appreciate any help!
CodePudding user response:
Setup: I made sample.xlsx
with the following sheets (and the same 2x2 table on each sheet):
xlsxfile <- "sample.xlsx"
sheetnames <- readxl::excel_sheets(xlsxfile)
sheetnames
# [1] "a_1" "a_2" "a_3" "b_1" "b_2" "c_1" "c_3"
newxlsxfiles <- sub("_.*", "", sheetnames)
newxlsxfiles
# [1] "a" "a" "a" "b" "b" "c" "c"
dats <- lapply(setNames(nm = sheetnames), readxl::read_xlsx, path = xlsxfile)
str(dats)
# List of 7
# $ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
datspl <- split(dats, newxlsxfiles)
str(datspl)
# List of 3
# $ a:List of 3
# ..$ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ b:List of 2
# ..$ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ c:List of 2
# ..$ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
invisible( Map(writexl::write_xlsx, datspl, paste0(names(datspl), ".xlsx")) )
list.files(pattern = "xlsx$")
# [1] "a.xlsx" "b.xlsx" "c.xlsx"
readxl::excel_sheets("a.xlsx")
# [1] "a_1" "a_2" "a_3"
FYI: if you need to remove the leading a_
from the sheet names, just change the name of dats
before splitting it, perhaps
# ...
names(dats) <- sub("^[^_]*_", "", sheetnames)
datspl <- split(dats, newxlsxfiles)
# ...