Home > Software design >  What is a good way to write a function to load multiple years of data - each in its own file - as a
What is a good way to write a function to load multiple years of data - each in its own file - as a

Time:02-16

The code I have works but I can't help but think there is a much more efficient way to write it. Essentially the function I would want to write would output the df variable.

read_data_raw<- function(y1,y2) {
  raw_scores_y1_y2 <- read.csv(paste0("raw_scores_",y1,"_",y2,".txt"))
  return(raw_scores_y1_y2)
}


read_data_vegas<- function(y1,y2) {
  vegas_y1_y2 <- read.csv(paste0("vegas_",y1,"_",y2,".txt"))
  return(vegas_y1_y2)
}



raw_scores <- read_data_raw(12,13)
vegas <- read_data_vegas(12,13)
df <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
raw_scores <- read_data_raw(13,14)
vegas <- read_data_vegas(13,14)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)
raw_scores <- read_data_raw(14,15)
vegas <- read_data_vegas(14,15)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)
raw_scores <- read_data_raw(15,16)
vegas <- read_data_vegas(15,16)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)
raw_scores <- read_data_raw(16,17)
vegas <- read_data_vegas(16,17)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)
raw_scores <- read_data_raw(17,18)
vegas <- read_data_vegas(17,18)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)
raw_scores <- read_data_raw(18,19)
vegas <- read_data_vegas(18,19)
df2 <- inner_join(raw_scores,vegas, by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId" ) )
df <- rbind(df,df2)

CodePudding user response:

You can try with purrr::map_df to perform inner join and combine all the dataframes into one.

library(dplyr)
library(purrr)

vals <- 12:18
df <- map_df(vals, 
             ~inner_join(read_data_raw(.x,.x   1), 
                         read_data_vegas(.x,.x   1),
                          by = c("TEAM_ID" = "TeamId","GAME_ID"="GameId")))
  • Related