I have folder with many files
C:/1/vyd
Names of these files
/new_vydel_1
/new_vydel_2
/new_vydel_n...
/new_vydel_725
short dput of 2 such datasets
new_vydel_1=structure(list(date = c("08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018"), row = c(3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), col = c(49L, 50L, 51L, 43L, 44L, 45L, 46L, 47L,
48L, 49L), B1 = c(6914L, 6914L, 6914L, 6958L, 6958L, 6958L, 6958L,
6914L, 6914L, 6914L), B2 = c(5560L, 5380L, 5644L, 5088L, 5280L,
5200L, 5472L, 5568L, 5560L, 5424L), B3 = c(4768L, 4840L, 4936L,
4320L, 4388L, 4572L, 4640L, 4704L, 4696L, 4488L), B4 = c(4960L,
4964L, 4540L, 4164L, 4412L, 4608L, 4628L, 4588L, 4416L, 4312L
), B5 = c(5554L, 5554L, 4782L, 4736L, 4736L, 5018L, 5018L, 4968L,
4968L, 4677L), B6 = c(5249L, 5249L, 4428L, 4553L, 4553L, 4832L,
4832L, 4741L, 4741L, 4428L), B7 = c(4893L, 4893L, 4138L, 4527L,
4527L, 4681L, 4681L, 4505L, 4505L, 4170L), B8 = c(4836L, 4840L,
5044L, 4074L, 4236L, 4404L, 4592L, 4668L, 4796L, 4628L), B8A = c(4679L,
4679L, 4098L, 4524L, 4524L, 4643L, 4643L, 4460L, 4460L, 3987L
), B9 = c(6752L, 6752L, 6752L, 7098L, 7098L, 7098L, 7098L, 6752L,
6752L, 6752L), B10 = c(4170L, 4170L, 3407L, 3301L, 3301L, 3612L,
3612L, 3600L, 3600L, 3352L), B11 = c(3124L, 3124L, 2514L, 2969L,
2969L, 3137L, 3137L, 2922L, 2922L, 2487L), B12 = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), COCта = c("2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е",
"2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е",
"2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е"
)), class = "data.frame", row.names = c(NA, -10L))
and
new_vydel_2=structure(list(date = c("08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018"), row = c(4L, 4L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L), col = c(5L, 6L, 4L, 5L, 6L, 4L, 5L, 6L, 7L,
3L), B1 = c(8136L, 8136L, 7369L, 8136L, 8136L, 7369L, 8136L,
8136L, 8136L, 7369L), B2 = c(5796L, 5756L, 5912L, 5956L, 6000L,
6196L, 6044L, 6164L, 6268L, 6588L), B3 = c(4840L, 4936L, 5096L,
5040L, 5096L, 5292L, 5280L, 5360L, 5480L, 5584L), B4 = c(4868L,
4996L, 4908L, 4932L, 5060L, 5136L, 5280L, 5444L, 5492L, 5500L
), B5 = c(5327L, 5327L, 5301L, 5471L, 5471L, 5301L, 5471L, 5471L,
5846L, 5977L), B6 = c(5207L, 5207L, 5087L, 5290L, 5290L, 5087L,
5290L, 5290L, 5777L, 5721L), B7 = c(5235L, 5235L, 5001L, 5241L,
5241L, 5001L, 5241L, 5241L, 5688L, 5539L), B8 = c(4876L, 4828L,
4936L, 4992L, 5052L, 5236L, 5236L, 5336L, 5380L, 5708L), B8A = c(5209L,
5209L, 4889L, 5233L, 5233L, 4889L, 5233L, 5233L, 5787L, 5324L
), B9 = c(8407L, 8407L, 7048L, 8407L, 8407L, 7048L, 8407L, 8407L,
8407L, 7048L), B10 = c(3343L, 3343L, 3188L, 3438L, 3438L, 3188L,
3438L, 3438L, 3774L, 3543L), B11 = c(2740L, 2740L, 2443L, 2800L,
2800L, 2443L, 2800L, 2800L, 3228L, 2669L), B12 = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), COCта = c("5OC3B1C1Е", "5OC3B1C1Е",
"5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е",
"5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е")), class = "data.frame", row.names = c(NA,
-10L))
Each of these dataset i must preprocess through this script for example
treespring2=new_vydel_1 %>%
separate(COCта, into = c("B","E","C","OC","OLS"), sep=c(1,4,6,8), remove = F) %>%
mutate(across(c(C,B,E,OC,OLS), ~stringr::str_remove_all(., "[A-Z]"))) %>%
mutate(across(c(C,B,E,OC,OLS), ~as.numeric(.)*10))
str(treespring2)
But if I have 1000 datasets, then every time I have to manually change the dataset in this script. It's very long and cumbersome.
Is there a way to automatically take all these datasets and preprocess them one by one through script, and then bind them through rbind
into one dataframe, as in the example
Here dput() example of final result.
final=structure(list(date = c("08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018"), row = c(3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L), col = c(49L,
50L, 51L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 5L, 6L, 4L, 5L,
6L, 4L, 5L, 6L, 7L, 3L), B1 = c(6914L, 6914L, 6914L, 6958L, 6958L,
6958L, 6958L, 6914L, 6914L, 6914L, 8136L, 8136L, 7369L, 8136L,
8136L, 7369L, 8136L, 8136L, 8136L, 7369L), B2 = c(5560L, 5380L,
5644L, 5088L, 5280L, 5200L, 5472L, 5568L, 5560L, 5424L, 5796L,
5756L, 5912L, 5956L, 6000L, 6196L, 6044L, 6164L, 6268L, 6588L
), B3 = c(4768L, 4840L, 4936L, 4320L, 4388L, 4572L, 4640L, 4704L,
4696L, 4488L, 4840L, 4936L, 5096L, 5040L, 5096L, 5292L, 5280L,
5360L, 5480L, 5584L), B4 = c(4960L, 4964L, 4540L, 4164L, 4412L,
4608L, 4628L, 4588L, 4416L, 4312L, 4868L, 4996L, 4908L, 4932L,
5060L, 5136L, 5280L, 5444L, 5492L, 5500L), B5 = c(5554L, 5554L,
4782L, 4736L, 4736L, 5018L, 5018L, 4968L, 4968L, 4677L, 5327L,
5327L, 5301L, 5471L, 5471L, 5301L, 5471L, 5471L, 5846L, 5977L
), B6 = c(5249L, 5249L, 4428L, 4553L, 4553L, 4832L, 4832L, 4741L,
4741L, 4428L, 5207L, 5207L, 5087L, 5290L, 5290L, 5087L, 5290L,
5290L, 5777L, 5721L), B7 = c(4893L, 4893L, 4138L, 4527L, 4527L,
4681L, 4681L, 4505L, 4505L, 4170L, 5235L, 5235L, 5001L, 5241L,
5241L, 5001L, 5241L, 5241L, 5688L, 5539L), B8 = c(4836L, 4840L,
5044L, 4074L, 4236L, 4404L, 4592L, 4668L, 4796L, 4628L, 4876L,
4828L, 4936L, 4992L, 5052L, 5236L, 5236L, 5336L, 5380L, 5708L
), B8A = c(4679L, 4679L, 4098L, 4524L, 4524L, 4643L, 4643L, 4460L,
4460L, 3987L, 5209L, 5209L, 4889L, 5233L, 5233L, 4889L, 5233L,
5233L, 5787L, 5324L), B9 = c(6752L, 6752L, 6752L, 7098L, 7098L,
7098L, 7098L, 6752L, 6752L, 6752L, 8407L, 8407L, 7048L, 8407L,
8407L, 7048L, 8407L, 8407L, 8407L, 7048L), B10 = c(4170L, 4170L,
3407L, 3301L, 3301L, 3612L, 3612L, 3600L, 3600L, 3352L, 3343L,
3343L, 3188L, 3438L, 3438L, 3188L, 3438L, 3438L, 3774L, 3543L
), B11 = c(3124L, 3124L, 2514L, 2969L, 2969L, 3137L, 3137L, 2922L,
2922L, 2487L, 2740L, 2740L, 2443L, 2800L, 2800L, 2443L, 2800L,
2800L, 3228L, 2669L), B12 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), COCта = c("2B2OC2OLS3C1Е",
"2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е",
"2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е", "2B2OC2OLS3C1Е",
"2B2OC2OLS3C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е",
"5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е", "5OC3B1C1Е",
"5OC3B1C1Е"), B = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L), Е = c(20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L), C = c(20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), OC = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), OLS = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-20L))
all NA
must be changed 0
How can i perform this manipulations? any help is valuable . Thank you.
CodePudding user response:
Maybe this works for you:
I'm going to assume your files as .csv. Create list of those files (list_dfs), and then import the files to a list(my_data):
list_dfs<-list.files("C:/1/vyd",pattern="csv$",full.names = T)
my_data<-lapply(list_dfs,read.delim)
Now, regarding the preprocessing create a function with the necessary tasks (preprocess and replace NAs):
preprocess<-function(dat){
treespring2=dat %>%
separate(COCта, into = c("B","E","C","OC","OLS"), sep=c(1,4,6,8), remove = F) %>%
mutate(across(c(C,B,E,OC,OLS), ~stringr::str_remove_all(., "[A-Z]"))) %>%
mutate(across(c(C,B,E,OC,OLS), ~as.numeric(.)*10))
treespring2[is.na(treespring2)]<-0
return(treespring2)
}
With this function apply it to the list of dataframes(my_data):
dat.prep<-lapply(my_data, FUN=preprocess)
To finalize use do.call function to rbind a list of dataframes:
dat.bind<-do.call("rbind",dat.prep)
And you might get the desired output.