merge two dataset based on one column and one row-CodePudding

I have two datasets, I want to merge them according to country and date, but the problem is that date in data2 is a row! How can I merge them? Do you have any idea?

# Create first data frame
data1 <- data.frame(ID1 = 1:5,                                 
                    country = letters[1:5],
                    date = c(2011, 2013, 2013, 2014, 2015),
                    x2 = 9)

# Create second data frame
data2 <- data.frame(ID1 = 3:7,                                 
                    country = letters[3:7],
                    2011 = c(4, 4, 5, 1, 1),
                    2012 = 5)

CodePudding user response：

We could use full_join after pivoting data2

library(dplyr)
library(tidyr)

data2 %>% 
  pivot_longer(
    cols = -c(ID1, country),
    names_to = "date",
    values_to = "value",
    names_transform = list(date = as.double)
  ) %>% 
  full_join(data1, by=c("ID1", "country", "date", "value"="x2"))

     ID1 country  date value
   <int> <chr>   <dbl> <dbl>
 1     3 c        2011     4
 2     3 c        2012     5
 3     4 d        2011     4
 4     4 d        2012     5
 5     5 e        2011     5
 6     5 e        2012     5
 7     6 f        2011     1
 8     6 f        2012     5
 9     7 g        2011     1
10     7 g        2012     5
11     1 a        2011     9
12     2 b        2013     9
13     3 c        2013     9
14     4 d        2014     9
15     5 e        2015     9

CodePudding user response：

Here an idea how you could merge them. But as written I don't see a country/date combination that fits both df.

library(data.table)

data1<- data.table(ID1 = 1:5,                                 
                     country = letters[1:5],
                     date = c(2011, 2013, 2013, 2014, 2015),
                     x2 = 9)

data2 <- data.table(ID1 = 3:7,                      
                     country = letters[3:7], 
                    "2011" = c(4, 4, 5, 1, 1), 
                    "2012" = 5)


data2 = melt(data2, id=c("ID1","country"), variable.name="date", value.name="x2") 
data2[, date:=as.numeric(as.character(date))]

data_final = merge(data1, data2, by=c("country", "date"), all=TRUE)

Output:

> data_final
    country date ID1.x x2.x ID1.y x2.y
 1:       a 2011     1    9    NA   NA
 2:       b 2013     2    9    NA   NA
 3:       c 2011    NA   NA     3    4
 4:       c 2012    NA   NA     3    5
 5:       c 2013     3    9    NA   NA
 6:       d 2011    NA   NA     4    4
 7:       d 2012    NA   NA     4    5
 8:       d 2014     4    9    NA   NA
 9:       e 2011    NA   NA     5    5
10:       e 2012    NA   NA     5    5
11:       e 2015     5    9    NA   NA
12:       f 2011    NA   NA     6    1
13:       f 2012    NA   NA     6    5
14:       g 2011    NA   NA     7    1
15:       g 2012    NA   NA     7    5

Append Data

As the data looks to me like you might want to append them, here with rbind()

> rbind(data1, data2)
    ID1 country date x2
 1:   1       a 2011  9
 2:   2       b 2013  9
 3:   3       c 2013  9
 4:   4       d 2014  9
 5:   5       e 2015  9
 6:   3       c 2011  4
 7:   4       d 2011  4
 8:   5       e 2011  5
 9:   6       f 2011  1
10:   7       g 2011  1
11:   3       c 2012  5
12:   4       d 2012  5
13:   5       e 2012  5
14:   6       f 2012  5
15:   7       g 2012  5

CodePudding user response：

Using reshape and merge.

data2 |> 
  reshape(idvar=c('ID1', 'country'), varying=3:4, v.names='x2', times=c(2011, 2012),
          timevar='date', direction='long') |>
  merge(data1, all=TRUE)
#    ID1 country date x2
# 1    1       a 2011  9
# 2    2       b 2013  9
# 3    3       c 2011  4
# 4    3       c 2012  5
# 5    3       c 2013  9
# 6    4       d 2011  4
# 7    4       d 2012  5
# 8    4       d 2014  9
# 9    5       e 2011  5
# 10   5       e 2012  5
# 11   5       e 2015  9
# 12   6       f 2011  1
# 13   6       f 2012  5
# 14   7       g 2011  1
# 15   7       g 2012  5

Note: R version 4.1.2 (2021-11-01)

data1 <- structure(list(ID1 = 1:5, country = c("a", "b", "c", "d", "e"
), date = c(2011, 2013, 2013, 2014, 2015), x2 = c(9, 9, 9, 9, 
9)), class = "data.frame", row.names = c(NA, -5L))

data2 <- structure(list(ID1 = 3:7, country = c("c", "d", "e", "f", "g"
), X2011 = c(4, 4, 5, 1, 1), X2012 = c(5, 5, 5, 5, 5)), class = "data.frame", row.names = c(NA, 
-5L))