Join three datasets with some specific columns in R-CodePudding

I have three datasets and I would like to create a new dataset, containing the nclusters column, and the Rank columns from both databases.

#databases
df1<-structure(list(nclusters = c(7, 8, 9, 10, 11, 12, 13, 14, 15, 
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 
32, 33, 34, 35), Distance = c(5248.99947051215, 64018.2500397613, 
63884.2473586514, 63822.8276312568, 63797.0409132363, 63797.0409132363, 
63794.4869445083, 63792.0363232768, 63792.0363232768, 63792.0363232768, 
63792.0363232768, 63766.907179171, 63763.0472418499, 63772.2020429773, 
63772.2020429773, 63772.2020429773, 63771.3399204139, 63766.8444809863, 
63766.8444809863, 63760.4694766072, 63760.4694766072, 63760.4694766072, 
63760.4694766072, 63760.4694766072, 63760.4694766072, 63760.4694766072, 
63759.7505492485, 63758.58565, 63758.58565), Rank = c(1L, 29L, 
28L, 27L, 25L, 26L, 24L, 20L, 21L, 22L, 23L, 15L, 12L, 17L, 18L, 
19L, 16L, 13L, 14L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 4L, 2L, 3L)), row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29"), class = "data.frame")


df2<-structure(list(nclusters = c(7, 8, 9, 10, 11, 12, 13, 14, 15, 
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 
32, 33, 34, 35), Coverage = c(0.375505373587409, 0.353979450349411, 
0.440737487316408, 0.524365528918811, 0.587640812323337, 0.587640812323337, 
0.595943762399558, 0.604428674828288, 0.604428674828288, 0.604428674828288, 
0.604428674828288, 0.753926389756351, 0.807111474295117, 0.705458712214102, 
0.705458712214102, 0.705458712214102, 0.712208937171404, 0.754627308916128, 
0.754627308916128, 0.865583389298128, 0.865583389298128, 0.865583389298128, 
0.865583389298128, 0.865583389298128, 0.865583389298128, 0.865583389298128, 
0.891174812227392, 1, 1), Production = c(1, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 
0.0146770379360228, 0.0146770379360228), Performance = c(0.687752686793705, 
0.184328244142717, 0.227707262626215, 0.269521283427417, 0.30115892512968, 
0.30115892512968, 0.30531040016779, 0.309552856382155, 0.309552856382155, 
0.309552856382155, 0.309552856382155, 0.384301713846187, 0.41089425611557, 
0.360067875075062, 0.360067875075062, 0.360067875075062, 0.363442987553713, 
0.384652173426076, 0.384652173426076, 0.440130213617075, 0.440130213617075, 
0.440130213617075, 0.440130213617075, 0.440130213617075, 0.440130213617075, 
0.440130213617075, 0.452925925081707, 0.507338518968011, 0.507338518968011
), Rank = c(1, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 15, 12, 
19, 18, 17, 16, 14, 13, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2)), class = "data.frame", row.names = c(NA, 
-29L))

df3<-structure(list(nclusters = c("1", "2", "3", "4", "5", "6", "7", 
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", 
"19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
), score = c(0.832550756796408, 0, 0.0623422939077367, 0.0994025225880894, 
0.119244082570837, 0.119244082570837, 0.12148059799612, 0.123691601200231, 
0.123691601200231, 0.123691601200231, 0.123691601200231, 0.153356141263302, 
0.160910166815577, 0.145325231545442, 0.145325231545442, 0.145325231545442, 
0.146519242848501, 0.153463626026167, 0.153463626026167, 0.168008878164586, 
0.168008878164586, 0.168008878164586, 0.168008878164586, 0.168008878164586, 
0.168008878164586, 0.168008878164586, 0.170786494229362, 0.180835139451362, 
0.180835139451362), rank = c(1L, 29L, 28L, 27L, 25L, 26L, 24L, 
20L, 21L, 22L, 23L, 15L, 12L, 17L, 18L, 19L, 16L, 13L, 14L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 4L, 2L, 3L)), row.names = c(NA, -29L
), class = "data.frame")

I did as follows:

   library(dplyr)
   df1$nclusters <- as.numeric(df1$nclusters)
   df4 <- dplyr::left_join(df1, df2, df3, by = "nclusters")
   df4%>%
   rename("df1" = Rank,"df2" = Rank,"df3" = rank) %>%
   select(nclusters, df1, df2, df3)

However, it didn't work, what am I doing wrong?

Expected output

  nclusters  df1  df2  df3    
     7      1     1     1
     8      29    29    29
     9      28    28    28
    10      27    27    27
    11      25    26    25
    12      26    25    26
    13      24    24    24
    14      20    23    20
    15      21    22    21
    16      22    21    22
    17      23    20    23
    18      15    15    15
    19      12    12    12
    20      17    19    17
    21      18    18    18
    22      19    17    19
    23      16    16    16
    24      13    14    13
    25      14    13    14
    26      5     11    5
    27     6      10    6
    28     7      9     7
    29     8      8     8
    30     9      7     9
    31     10     6     10
    32     11     5     11
    33     4      4     4
    34     2      3     2
    35     3      2     3

CodePudding user response：

Column df3 looks incorrect in your output. Do you want to join data frames by nclusters? If yes, see the solution below.

 df4 <- df1 %>% 
      left_join(df2, "nclusters") %>% 
      left_join(df3 %>% 
                  mutate(nclusters = as.numeric(nclusters))) %>% 
      select(nclusters, contains("rank")) %>% 
      rename_at(vars(contains("rank")), ~c("df1", "df2", "df3"))

CodePudding user response：

Try this:

> library(dplyr)

> df3$nclusters <- as.numeric(df3$nclusters)


> df4 <- df1 %>%
    left_join(df2, by = "nclusters") %>%
    left_join(df3, by = "nclusters")

> df4 %>%
    rename("df1" = Rank.x, "df2" = Rank.y, "df3" = rank) %>%
    select(nclusters, df1, df2, df3)

 nclusters df1 df2 df3
1          7   1   1  24
2          8  29  29  20
3          9  28  28  21
4         10  27  27  22
5         11  25  26  23
6         12  26  25  15