Home > database >  How to join two datasets where each row of a dataset matches with multiple rows in the other
How to join two datasets where each row of a dataset matches with multiple rows in the other

Time:09-30

I have to join two dataframe of this form:

  1. (110x4) Each row corresponds to a firm, identified with a unique code in the column "idstd".
new("SpatialPointsDataFrame", data = structure(list(country = c("Benin", 
"Benin", "Benin", "Benin", "Benin", "Benin"), idstd = c(606872, 
606969, 606898, 606978, 606937, 606847), oc_lat = c(4.203033, 
4.203033, 4.203033, 4.203033, 4.203033, 4.203033), oc_lng = c(10.0563057, 
10.0563057, 10.0563057, 10.0563057, 10.0563057, 10.0563057)), row.names = 938:943, class = "data.frame"), 
    coords.nrs = numeric(0), coords = structure(c(10.0563057, 
    10.0563057, 10.0563057, 10.0563057, 10.0563057, 10.0563057, 
    4.203033, 4.203033, 4.203033, 4.203033, 4.203033, 4.203033
    ), .Dim = c(6L, 2L), .Dimnames = list(NULL, c("coords.x1", 
    "coords.x2"))), bbox = structure(c(10.0563057, 4.203033, 
    10.0563057, 4.203033), .Dim = c(2L, 2L), .Dimnames = list(
        c("coords.x1", "coords.x2"), c("min", "max"))), proj4string = new("CRS", 
        projargs = " proj=longlat  datum=WGS84  no_defs"))
  1. (17x110) Each row corresponds to a specific investment projects and each column corresponds to the same firms of 1). This is a distance matrix, so the cells are filled with distances in km.
structure(c(879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938, 879.060754982938, 879.060754982938, 879.060754982938, 
879.060754982938), .Dim = c(6L, 110L))

My task is to join these two dataframes, where the expected result is a dataframe where each row is the combination of a firm and a project (110x17) and the columns are the sum of the two dataframes (4 110).

How do I join them to form a 1870x114 dataframe?

CodePudding user response:

Assuming that df.companies is the first data frame and df.distances the second one, I would add a dummy variable id to join each row in the 1st df with each row in the 2nd:

library(tidyverse)
left_join(
  as_tibble(df.companies) %>%
    mutate(dummy_id = 1),
  as_tibble(df.distances) %>%
    mutate(dummy_id = 1)
) %>%
  select(-dummy_id)


# A tibble: 36 x 116
   country  idstd oc_lat oc_lng coords.x1 coords.x2    V1    V2    V3    V4    V5    V6    V7    V8    V9   V10   V11   V12   V13   V14   V15   V16   V17   V18   V19   V20
   <chr>    <dbl>  <dbl>  <dbl>     <dbl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 2 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 3 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 4 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 5 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 6 Benin   606872   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 7 Benin   606969   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 8 Benin   606969   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
 9 Benin   606969   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
10 Benin   606969   4.20   10.1      10.1      4.20  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.  879.
# ... with 26 more rows, and 90 more variables: V21 <dbl>, V22 <dbl>, V23 <dbl>, V24 <dbl>, V25 <dbl>, V26 <dbl>, V27 <dbl>, V28 <dbl>, V29 <dbl>, V30 <dbl>, V31 <dbl>,
#   V32 <dbl>, V33 <dbl>, V34 <dbl>, V35 <dbl>, V36 <dbl>, V37 <dbl>, V38 <dbl>, V39 <dbl>, V40 <dbl>, V41 <dbl>, V42 <dbl>, V43 <dbl>, V44 <dbl>, V45 <dbl>, V46 <dbl>,
#   V47 <dbl>, V48 <dbl>, V49 <dbl>, V50 <dbl>, V51 <dbl>, V52 <dbl>, V53 <dbl>, V54 <dbl>, V55 <dbl>, V56 <dbl>, V57 <dbl>, V58 <dbl>, V59 <dbl>, V60 <dbl>, V61 <dbl>,
#   V62 <dbl>, V63 <dbl>, V64 <dbl>, V65 <dbl>, V66 <dbl>, V67 <dbl>, V68 <dbl>, V69 <dbl>, V70 <dbl>, V71 <dbl>, V72 <dbl>, V73 <dbl>, V74 <dbl>, V75 <dbl>, V76 <dbl>,
#   V77 <dbl>, V78 <dbl>, V79 <dbl>, V80 <dbl>, V81 <dbl>, V82 <dbl>, V83 <dbl>, V84 <dbl>, V85 <dbl>, V86 <dbl>, V87 <dbl>, V88 <dbl>, V89 <dbl>, V90 <dbl>, V91 <dbl>,
#   V92 <dbl>, V93 <dbl>, V94 <dbl>, V95 <dbl>, V96 <dbl>, V97 <dbl>, V98 <dbl>, V99 <dbl>, V100 <dbl>, V101 <dbl>, V102 <dbl>, V103 <dbl>, V104 <dbl>, V105 <dbl>,
#   V106 <dbl>, V107 <dbl>, V108 <dbl>, V109 <dbl>, V110 <dbl>

On sample data I get 6x6 rows and 116 cols (as there are 6 firms 6 projects and 6 cols for firms and 110 cols for distances)

CodePudding user response:

I wrote a function that answers this question.

Using pivot_longer on the distance matrix made the work.

countrywave <- function(WBES_country_wave, FDI_country) {

# Calculate spatial distance matrix between WBES country/wave and all FDI projects
dist_country_wave <- as.data.frame(distm(WBES_country_wave,FDI_country, fun = distGeo)/1000)

# Add firm ID to distance matrix
WBES_country_wave <- st_drop_geometry(st_as_sf(WBES_country_wave))

dist_country_wave$idstd <- WBES_country_wave$idstd

# Make distance matrix long
dist_country_wave <- pivot_longer(
 dist_country_wave,
 cols = V1:V236,
 names_to = "project",
 values_to = "distance"
)

# Add distances to country/wave dataset
country_wave <- left_join(WBES_country_wave,dist_country_wave)

# Add FDI attributes to country/wave dataset
FDI_country_df <- st_drop_geometry(st_as_sf(FDI_country[1:44]))
FDI_country_df <- mutate(FDI_country_df, ID = row_number())
FDI_country_df$ID <- sub("^", "V", FDI_country_df$ID )

country_wave <- left_join(country_wave,FDI_country_df, by = c("project" ="ID"))

return(country_wave)

}
  • Related