Suppose we have files 2021-02.csv, 2021-2.csv, ... , and 2021-12.csv in directory C:/R/month_data and we want to read them all into separate data frames but replacing original names with months' names (eg. 2021-01.csv should turn into "jan", 2021-02.csv into "feb", ..., 2021-12.csv into "dec").
The aim of the code is to have a total of 12 dfs.
I've written the following code:
filenames = list.files(path = "C:/R/month_data",
pattern = "2021- .*csv")
names = substr(filenames,1,7)
months = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")
for(i in names){
for (j in months){
filepath = file.path("C:/R/month_data",paste(i,".csv",sep=""))
assign(j, read.csv(filepath))
}
}
The code technically runs but it doesn't associate the month number (eg. 2021-04) with the rigth month name (eg. "apr")
What should I do?
CodePudding user response:
If you want to name each data frame you can read it into a list with lapply
and rename the elements of the list. Now you can access the data frames in the list. Also, you can use the three-letter abbreviations for the English month names, month.abb
.:
filenames = list.files(path = "C:/R/month_data/",
pattern = "2021- .*csv")
filepath = paste0("C:/R/month_data/", filenames)
dfs <- lapply(filepath, read.csv)
names(dfs) <- month.abb
View(dfs$Jan)
CodePudding user response:
here is a possible approach
library(data.table)
# get files to read
f <- list.files("c:/R/month_data/", pattern = "^2021-.*\\.csv$", full.names = TRUE)
# destill months from filename
f.month <- month.abb[as.numeric(gsub("2021-(.*)\\.csv$", "\\1", basename(f)))]
#if you want month names based on a locale, you can use
# lubridate::month(
# as.numeric(gsub("2021-(.*)\\.csv$", "\\1", basename(f))),
# label = TRUE, abbr = TRUE)
# read the csv files
L <- lapply(f, data.table::fread)
# pass names to the list
names(L) <- f.month
# pass list's contents to the global environment
list2env(L, envir = .GlobalEnv)
CodePudding user response:
Here's a solution with lubridate::month()
and tidyr::nest()
.
library(tidyverse)
library(lubridate, warn.conflicts = FALSE)
# this is how I would read the file data, but because I don't have files, I will comment out
# filenames <- set_names(filenames, filenames)
# df <- map_dfr(filenames, read_csv, .id = "filename")
# and instead create simulated data at this point
df <- expand_grid(filename = c("2020-02.csv", "2021-02.csv", "2021-03.csv"), x = 1:5)
# parse the filename text to get a string for month name
df <- df %>%
mutate(
dttm = filename %>%
str_remove(".csv") %>%
str_split("-", simplify = FALSE) %>%
map_chr(~str_c(.[2], "1", .[1], sep = "-")) %>%
mdy(),
mnth = month(dttm, label = TRUE),
new_filename = str_c(mnth, ".csv")
)
print(df)
#> # A tibble: 15 x 5
#> filename x dttm mnth new_filename
#> <chr> <int> <date> <ord> <chr>
#> 1 2020-02.csv 1 2020-02-01 Feb Feb.csv
#> 2 2020-02.csv 2 2020-02-01 Feb Feb.csv
#> 3 2020-02.csv 3 2020-02-01 Feb Feb.csv
#> 4 2020-02.csv 4 2020-02-01 Feb Feb.csv
#> 5 2020-02.csv 5 2020-02-01 Feb Feb.csv
#> 6 2021-02.csv 1 2021-02-01 Feb Feb.csv
#> 7 2021-02.csv 2 2021-02-01 Feb Feb.csv
#> 8 2021-02.csv 3 2021-02-01 Feb Feb.csv
#> 9 2021-02.csv 4 2021-02-01 Feb Feb.csv
#> 10 2021-02.csv 5 2021-02-01 Feb Feb.csv
#> 11 2021-03.csv 1 2021-03-01 Mar Mar.csv
#> 12 2021-03.csv 2 2021-03-01 Mar Mar.csv
#> 13 2021-03.csv 3 2021-03-01 Mar Mar.csv
#> 14 2021-03.csv 4 2021-03-01 Mar Mar.csv
#> 15 2021-03.csv 5 2021-03-01 Mar Mar.csv
# now, one row per month with columns new_filename and data (list of tibbles)
df <- df %>%
select(-c(filename, dttm, mnth)) %>%
nest(data = -new_filename)
print(df)
#> # A tibble: 2 x 2
#> new_filename data
#> <chr> <list>
#> 1 Feb.csv <tibble [10 x 1]>
#> 2 Mar.csv <tibble [5 x 1]>
# write files
df %>%
with(
walk2(data, new_filename, write_csv)
)
Created on 2022-03-21 by the reprex package (v2.0.1)