Home > Blockchain >  Loop R merge multiple data frames together
Loop R merge multiple data frames together

Time:02-10

I am trying to merge multiple dataset (left_join) together inside a loop.

Here is what the data looks like:

Fr1 <- data.frame (v1  = c("a", "b", "c"),
                  period1 = c("Yes", "Yes", "Yes")
)

Fr2 <- data.frame (v1  = c("b", "d"),
                   period2 = c("Yes", "Yes")
)

Fr3 <- data.frame (v1  = c("c"),
                   period3 = c("Yes")
)


Be1 <- data.frame (v1  = c("a", "b", "c"),
                   period1 = c("Yes", "Yes", "Yes")
)

Be2 <- data.frame (v1  = c("b", "c"),
                   period2 = c("Yes", "Yes")
)

Be3 <- data.frame (v1  = c("d"),
                   period3 = c("Yes")
)

table_Fr <- data.frame (v1 = c("a", "b", "c", "d"))

table_Be <- data.frame (v1 = c("a", "b", "c", "d"))

The idea is simple: Fr1, Fr2, and Fr3 go with table_Fr and Be1, Be2 and Be3 go with table_Be.

A simple way to do it dataframe by dataframe goes like this:

table_Fr <- left_join(table_Fr, Fr1, by="v1") 
table_Fr <- left_join(table_Fr, Fr2, by="v1") 
table_Fr <- left_join(table_Fr, Fr3, by="v1") 
table_Fr <- table_Fr %>%
  mutate(period1 = ifelse(is.na(period1), "No", period1)) %>%
  mutate(period2 = ifelse(is.na(period2), "No", period2)) %>%
  mutate(period3 = ifelse(is.na(period3), "No", period3)) 

However, I have a large number of data frames to merge together, hence I want to use a loop (I know loop are not best in R but I have to do it through a loop...). Unfortunately, I can't get it to work, does anyone can help?

countries <- c("Fr", "Be")

for(c in countries) {
  for(i in 1:6) {
p <- paste0("period", i)
cp <- paste0(c, i)
t <- paste0("table_", c)
a <- left_join(t, cp, by="v1") %>%
  mutate(!!p := ifelse(is.na(!!p), "No", !!p)) 
  assign(paste0("table_",c), a)      
}
}

CodePudding user response:

You can use base R Reduce or purrr::reduce.

base R

ll <- mget(ls(pattern = "Fr"))
table_Fr <- Reduce(function(x, y) merge(x, y, all=TRUE), ll)
table_Fr[is.na(table_Fr)] <- "No"

#   v1 period1 period2 period3
# 1  a     Yes      No      No
# 2  b     Yes     Yes      No
# 3  c     Yes      No     Yes
# 4  d      No     Yes      No

tidyverse

library(tidyverse)
reduce(ll, full_join, by = 'v1') %>% 
  mutate(across(everything(), ~ replace_na(., 'No')))

#   v1 period1 period2 period3
# 1  a     Yes      No      No
# 2  b     Yes     Yes      No
# 3  c     Yes      No     Yes
# 4  d      No     Yes      No

CodePudding user response:

Using base r libraries

for(table in list(Fr1,Fr2,Fr3)) table_Fr <- merge(table_Fr, table, all.x=T)
for(table in list(Be1,Be2,Be3)) table_Be <- merge(table_Be, table, all.x=T)

Or if you want to cycle through many countries and tables in a loop:

countries <- c('Fr','Be')
for(c in countries) {
  master_table <- get(paste0('table_',c))
  for(i in 1:3){
    master_table <- merge(master_table, get(paste0(c,i)), all.x=T)
  }
  assign(paste0('table_',c),master_table)
}

CodePudding user response:

lapply(countries, function(cntry) {
  env=globalenv()
  Reduce(dplyr::full_join, mget(ls(pattern=cntry,env=env), env=env))
})

Output:

[[1]]
  v1 period1 period2 period3
1  a     Yes    <NA>    <NA>
2  b     Yes     Yes    <NA>
3  c     Yes    <NA>     Yes
4  d    <NA>     Yes    <NA>

[[2]]
  v1 period1 period2 period3
1  a     Yes    <NA>    <NA>
2  b     Yes     Yes    <NA>
3  c     Yes     Yes    <NA>
4  d    <NA>    <NA>     Yes
  • Related