Home > database >  How to match two columns with different frequency in different dataframes?
How to match two columns with different frequency in different dataframes?

Time:09-07

I have two dataframes:

df1 = data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2,3,3,3,4,4,5,5,5,5), stringsAsFactors = F)

df2 = data.frame(Date = c("2000-01-01","2001-01-1","2002-01-01","2003-01-01","2004-01-01"), stringsAsFactors = F)

Now, I would like to match the dates in df2 with the ID in df1 in the following way:


   ID  Date
1   1  2000-01-01
2   1  2000-01-01
3   1  2000-01-01
4   1  2000-01-01
5   1  2000-01-01
6   1  2000-01-01
7   1  2000-01-01
8   2  2001-01-01
9   2  2001-01-01
10  2  2001-01-01
11  2  2001-01-01
12  3  2002-01-01
13  3  2002-01-01
14  3  2002-01-01
15  4  2003-01-01
16  4  2003-01-01
17  5  2004-01-01
18  5  2004-01-01
19  5  2004-01-01
20  5  2004-01-01

Basically, I assume that "ID 1 == 2000-01-01" and want to repeat it for as many times 1 appears in ID. Same logic for the rest.

Can anyone help me?

Thanks!

CodePudding user response:

Using cbind:

cbind(df1, df2[df1$ID,])
#   ID       Date
#1   1 2000-01-01
#2   1 2000-01-01
#3   1 2000-01-01
#4   1 2000-01-01
#5   1 2000-01-01
#6   1 2000-01-01
#7   1 2000-01-01
#8   2  2001-01-1
#9   2  2001-01-1
#10  2  2001-01-1
#11  2  2001-01-1
#12  3 2002-01-01
#13  3 2002-01-01
#14  3 2002-01-01
#15  4 2003-01-01
#16  4 2003-01-01
#17  5 2004-01-01
#18  5 2004-01-01
#19  5 2004-01-01
#20  5 2004-01-01

or merge:

merge(df1, df2, by.x="ID", by.y=0)
#   ID       Date
#1   1 2000-01-01
#2   1 2000-01-01
#3   1 2000-01-01
#4   1 2000-01-01
#5   1 2000-01-01
#6   1 2000-01-01
#7   1 2000-01-01
#8   2  2001-01-1
#9   2  2001-01-1
#10  2  2001-01-1
#11  2  2001-01-1
#12  3 2002-01-01
#13  3 2002-01-01
#14  3 2002-01-01
#15  4 2003-01-01
#16  4 2003-01-01
#17  5 2004-01-01
#18  5 2004-01-01
#19  5 2004-01-01
#20  5 2004-01-01

CodePudding user response:

You could use a factor.

cbind(df1, Date=factor(df1$ID, labels=df2$Date))
#    ID       Date
# 1   1 2000-01-01
# 2   1 2000-01-01
# 3   1 2000-01-01
# 4   1 2000-01-01
# 5   1 2000-01-01
# 6   1 2000-01-01
# 7   1 2000-01-01
# 8   2 2001-01-01
# 9   2 2001-01-01
# 10  2 2001-01-01
# 11  2 2001-01-01
# 12  3 2002-01-01
# 13  3 2002-01-01
# 14  3 2002-01-01
# 15  4 2003-01-01
# 16  4 2003-01-01
# 17  5 2004-01-01
# 18  5 2004-01-01
# 19  5 2004-01-01
# 20  5 2004-01-01

You may do this also without df2:

transform(df1, Date=factor(ID, labels=seq.Date(as.Date('2000-01-01'), by='year', length.out=length(unique(ID)))))

Data:

df1 <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 
3, 4, 4, 5, 5, 5, 5)), class = "data.frame", row.names = c(NA, 
-20L))

df2 <- structure(list(Date = c("2000-01-01", "2001-01-01", "2002-01-01", 
"2003-01-01", "2004-01-01")), class = "data.frame", row.names = c(NA, 
-5L))  ## corrected

CodePudding user response:

You can use rep() with table()

df1 = data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2,3,3,3,4,4,5,5,5,5), stringsAsFactors = F)

df2 = data.frame(Date = c("2000-01-01", "2001-01-01","2002-01-01","2003-01-01","2004-01-01"), stringsAsFactors = F)

df1$Date <- rep(x = df2$Date, times = table(df1$ID))
df1
#>    ID       Date
#> 1   1 2000-01-01
#> 2   1 2000-01-01
#> 3   1 2000-01-01
#> 4   1 2000-01-01
#> 5   1 2000-01-01
#> 6   1 2000-01-01
#> 7   1 2000-01-01
#> 8   2 2001-01-01
#> 9   2 2001-01-01
#> 10  2 2001-01-01
#> 11  2 2001-01-01
#> 12  3 2002-01-01
#> 13  3 2002-01-01
#> 14  3 2002-01-01
#> 15  4 2003-01-01
#> 16  4 2003-01-01
#> 17  5 2004-01-01
#> 18  5 2004-01-01
#> 19  5 2004-01-01
#> 20  5 2004-01-01

CodePudding user response:

You could use indexing:

transform(df1, Date = df2$Date[ID])

   ID       Date
1   1 2000-01-01
2   1 2000-01-01
3   1 2000-01-01
4   1 2000-01-01
5   1 2000-01-01
6   1 2000-01-01
7   1 2000-01-01
8   2 2001-01-01
9   2 2001-01-01
10  2 2001-01-01
11  2 2001-01-01
12  3 2002-01-01
13  3 2002-01-01
14  3 2002-01-01
15  4 2003-01-01
16  4 2003-01-01
17  5 2004-01-01
18  5 2004-01-01
19  5 2004-01-01
20  5 2004-01-01
  • Related