I have three datasets and I would like to create a new dataset, containing the nclusters
column, and the Rank
columns from both databases.
#databases
df1<-structure(list(nclusters = c(7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35), Distance = c(5248.99947051215, 64018.2500397613,
63884.2473586514, 63822.8276312568, 63797.0409132363, 63797.0409132363,
63794.4869445083, 63792.0363232768, 63792.0363232768, 63792.0363232768,
63792.0363232768, 63766.907179171, 63763.0472418499, 63772.2020429773,
63772.2020429773, 63772.2020429773, 63771.3399204139, 63766.8444809863,
63766.8444809863, 63760.4694766072, 63760.4694766072, 63760.4694766072,
63760.4694766072, 63760.4694766072, 63760.4694766072, 63760.4694766072,
63759.7505492485, 63758.58565, 63758.58565), Rank = c(1L, 29L,
28L, 27L, 25L, 26L, 24L, 20L, 21L, 22L, 23L, 15L, 12L, 17L, 18L,
19L, 16L, 13L, 14L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 4L, 2L, 3L)), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29"), class = "data.frame")
df2<-structure(list(nclusters = c(7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35), Coverage = c(0.375505373587409, 0.353979450349411,
0.440737487316408, 0.524365528918811, 0.587640812323337, 0.587640812323337,
0.595943762399558, 0.604428674828288, 0.604428674828288, 0.604428674828288,
0.604428674828288, 0.753926389756351, 0.807111474295117, 0.705458712214102,
0.705458712214102, 0.705458712214102, 0.712208937171404, 0.754627308916128,
0.754627308916128, 0.865583389298128, 0.865583389298128, 0.865583389298128,
0.865583389298128, 0.865583389298128, 0.865583389298128, 0.865583389298128,
0.891174812227392, 1, 1), Production = c(1, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228, 0.0146770379360228, 0.0146770379360228,
0.0146770379360228, 0.0146770379360228), Performance = c(0.687752686793705,
0.184328244142717, 0.227707262626215, 0.269521283427417, 0.30115892512968,
0.30115892512968, 0.30531040016779, 0.309552856382155, 0.309552856382155,
0.309552856382155, 0.309552856382155, 0.384301713846187, 0.41089425611557,
0.360067875075062, 0.360067875075062, 0.360067875075062, 0.363442987553713,
0.384652173426076, 0.384652173426076, 0.440130213617075, 0.440130213617075,
0.440130213617075, 0.440130213617075, 0.440130213617075, 0.440130213617075,
0.440130213617075, 0.452925925081707, 0.507338518968011, 0.507338518968011
), Rank = c(1, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 15, 12,
19, 18, 17, 16, 14, 13, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2)), class = "data.frame", row.names = c(NA,
-29L))
df3<-structure(list(nclusters = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
), score = c(0.832550756796408, 0, 0.0623422939077367, 0.0994025225880894,
0.119244082570837, 0.119244082570837, 0.12148059799612, 0.123691601200231,
0.123691601200231, 0.123691601200231, 0.123691601200231, 0.153356141263302,
0.160910166815577, 0.145325231545442, 0.145325231545442, 0.145325231545442,
0.146519242848501, 0.153463626026167, 0.153463626026167, 0.168008878164586,
0.168008878164586, 0.168008878164586, 0.168008878164586, 0.168008878164586,
0.168008878164586, 0.168008878164586, 0.170786494229362, 0.180835139451362,
0.180835139451362), rank = c(1L, 29L, 28L, 27L, 25L, 26L, 24L,
20L, 21L, 22L, 23L, 15L, 12L, 17L, 18L, 19L, 16L, 13L, 14L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 4L, 2L, 3L)), row.names = c(NA, -29L
), class = "data.frame")
I did as follows:
library(dplyr)
df1$nclusters <- as.numeric(df1$nclusters)
df4 <- dplyr::left_join(df1, df2, df3, by = "nclusters")
df4%>%
rename("df1" = Rank,"df2" = Rank,"df3" = rank) %>%
select(nclusters, df1, df2, df3)
However, it didn't work, what am I doing wrong?
Expected output
nclusters df1 df2 df3
7 1 1 1
8 29 29 29
9 28 28 28
10 27 27 27
11 25 26 25
12 26 25 26
13 24 24 24
14 20 23 20
15 21 22 21
16 22 21 22
17 23 20 23
18 15 15 15
19 12 12 12
20 17 19 17
21 18 18 18
22 19 17 19
23 16 16 16
24 13 14 13
25 14 13 14
26 5 11 5
27 6 10 6
28 7 9 7
29 8 8 8
30 9 7 9
31 10 6 10
32 11 5 11
33 4 4 4
34 2 3 2
35 3 2 3
CodePudding user response:
Column df3
looks incorrect in your output. Do you want to join data frames by nclusters
? If yes, see the solution below.
df4 <- df1 %>%
left_join(df2, "nclusters") %>%
left_join(df3 %>%
mutate(nclusters = as.numeric(nclusters))) %>%
select(nclusters, contains("rank")) %>%
rename_at(vars(contains("rank")), ~c("df1", "df2", "df3"))
CodePudding user response:
Try this:
> library(dplyr)
> df3$nclusters <- as.numeric(df3$nclusters)
> df4 <- df1 %>%
left_join(df2, by = "nclusters") %>%
left_join(df3, by = "nclusters")
> df4 %>%
rename("df1" = Rank.x, "df2" = Rank.y, "df3" = rank) %>%
select(nclusters, df1, df2, df3)
nclusters df1 df2 df3
1 7 1 1 24
2 8 29 29 20
3 9 28 28 21
4 10 27 27 22
5 11 25 26 23
6 12 26 25 15