Home > OS >  Insert a value from a dataframe into another dataframe picked by ID and year
Insert a value from a dataframe into another dataframe picked by ID and year

Time:05-19

I have a dataframe df1 as below -

ID1 <- c(155,487,102,666,100)
year <- c(2002,2003,2002,2000,2004)
df1 <- data.frame(ID1, year)
df1
  ID1 year
1 155 2002
2 487 2003
3 102 2002
4 666 2000
5 100 2004

Then I have a dataframe df2 as below -

library(dplyr)
ID2 <- c(487,334,555,100,250,366,155,788)
a <- c(0.3,0.33,1.34,0.96,1.9,2.23,0.43,3.73)
b <- c(0.55,0.63,0.46,.26,3.1,0.4,2.46,4.34)
c <- c(0.78,0.83,0.34,0.95,0.12,0.72,0.52,0.03)
d <- c(2.3,1.23,2.35,0.96,0.1,0.78,0.49,2.03)
e <- c(1.3,0.53,1.54,1.36,2.34,0.34,0.25,1.33)
df2 <- data.frame(ID2,a,b,c,d,e)
df2 <- rename(df2, "2000"=a, "2001"=b, "2002"=c, "2003"=d, "2004"=e)
df2
  ID2 2000 2001 2002 2003 2004
1 487 0.30 0.55 0.78 2.30 1.30
2 334 0.33 0.63 0.83 1.23 0.53
3 555 1.34 0.46 0.34 2.35 1.54
4 100 0.96 0.26 0.95 0.96 1.36
5 250 1.90 3.10 0.12 0.10 2.34
6 366 2.23 0.40 0.72 0.78 0.34
7 155 0.43 2.46 0.52 0.49 0.25
8 788 3.73 4.34 0.03 2.03 1.33

I want to pick values from df2 and insert it into df1 if ID1 of df1 matches ID2 of df2. The column of df2 the value is to be picked from is named in column year of df1.

The result should be this -

  ID1 year value
1 155 2002  0.52
2 487 2003  2.30
3 102 2002    NA
4 666 2000    NA
5 100 2004  1.36

Any help will be great. Thank you.

CodePudding user response:

We may reshape the second data to 'long' format and then do a join

library(dplyr)
library(tidyr)
df2 %>% 
  mutate(ID2 = as.character(ID2)) %>%
  pivot_longer(cols = -ID2, names_to = 'year') %>% 
  left_join(df1 %>% 
             mutate(across(everything(), as.character)), .,
     by = c("ID1" = "ID2", "year"))

-output

 ID1 year value
1 155 2002  0.52
2 487 2003  2.30
3 102 2002    NA
4 666 2000    NA
5 100 2004  1.36

Or using base R with row/column indexing created with match

df1$value <- df2[-1][cbind(match(df1$ID1, df2$ID2), 
                      match(df1$year, names(df2)[-1]))]

data

df1 <- structure(list(ID1 = c(155L, 487L, 102L, 666L, 100L), year = c(2002L, 
2003L, 2002L, 2000L, 2004L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))

df2 <- structure(list(ID2 = c(487L, 334L, 555L, 100L, 250L, 366L, 155L, 
788L), `2000` = c(0.3, 0.33, 1.34, 0.96, 1.9, 2.23, 0.43, 3.73
), `2001` = c(0.55, 0.63, 0.46, 0.26, 3.1, 0.4, 2.46, 4.34), 
    `2002` = c(0.78, 0.83, 0.34, 0.95, 0.12, 0.72, 0.52, 0.03
    ), `2003` = c(2.3, 1.23, 2.35, 0.96, 0.1, 0.78, 0.49, 2.03
    ), `2004` = c(1.3, 0.53, 1.54, 1.36, 2.34, 0.34, 0.25, 1.33
    )), class = "data.frame", row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8"))

CodePudding user response:

Another option using lookup:

library(tidyverse)

df1 <- tribble(
  ~ID1, ~year,
  155, 2002,
  487, 2003,
  102, 2002,
  666, 2000,
  100, 2004
) |> 
  mutate(index = str_c(ID1, year))

df2 <- tribble(
  ~ID2, ~"2000", ~"2001", ~"2002", ~"2003", ~"2004",
  487, 0.30, 0.55, 0.78, 2.30, 1.30,
  334, 0.33, 0.63, 0.83, 1.23, 0.53,
  555, 1.34, 0.46, 0.34, 2.35, 1.54,
  100, 0.96, 0.26, 0.95, 0.96, 1.36,
  250, 1.90, 3.10, 0.12, 0.10, 2.34,
  366, 2.23, 0.40, 0.72, 0.78, 0.34,
  155, 0.43, 2.46, 0.52, 0.49, 0.25,
  788, 3.73, 4.34, 0.03, 2.03, 1.33
) |> 
  pivot_longer(-ID2, names_to = "year") |> 
  mutate(index = str_c(ID2, year))

df1 |> mutate(value = df2$value[match(index, df2$index)])
#> # A tibble: 5 × 4
#>     ID1  year index   value
#>   <dbl> <dbl> <chr>   <dbl>
#> 1   155  2002 1552002  0.52
#> 2   487  2003 4872003  2.3 
#> 3   102  2002 1022002 NA   
#> 4   666  2000 6662000 NA   
#> 5   100  2004 1002004  1.36

Created on 2022-05-18 by the reprex package (v2.0.1)

  • Related