Home > Mobile >  Add two new columns based on another data frame
Add two new columns based on another data frame

Time:12-01

Having a dataframe like this:

structure(list(id = c("id1", "id1", "id2", "id2", "id3", "id3"
), title_num = c(1, 2, 1, 2, 1, 2), title_name = c("amazon1", 
"yahoo2", "google1", NA, "yahoo1", "amazon2")), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

and another one like this:

    dfcheck <- structure(list(status = c("open/close", "close", "open"), stock = c("company energy", 
"goods and books", "other"), name = c("amazon1;google1", "google3;yahoo1", 
"yahoo2;amazon2;google2")), class = "data.frame", row.names = c(NA, 
-3L))

How is it possible to have an output like this:

id title_num title_name   stock           status
id1         1    amazon1 company energy open/close
id1         2     yahoo2 other          open
id2         1    google1 company energy open/close
id2         2       <NA> <NA>           <NA>
id3         1     yahoo1 goods and books close
id3         2    amazon2 other           open

CodePudding user response:

library(dplyr)

df <-
structure(list(id = c("id1", "id1", "id2", "id2", "id3", "id3"
), title_num = c(1, 2, 1, 2, 1, 2), title_name = c("amazon1", 
                                                   "yahoo2", "google1", NA, "yahoo1", "amazon2")), row.names = c(NA, 
                                                                                                                 -6L), class = c("tbl_df", "tbl", "data.frame"))

dfcheck <-
 structure(list(status = c("open/close", "close", "open"), stock = c("company energy", 
                                                                               "goods and books", "other"), name = c("amazon1;google1", "google3;yahoo1", 
                                                                                                                     "yahoo2;amazon2;google2")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                     -3L))

df %>% 
  left_join(
    dfcheck %>% 
      separate_rows(name,sep = ";"),
    by = c("title_name" = "name")
  )

# A tibble: 6 x 5
  id    title_num title_name status     stock          
  <chr>     <dbl> <chr>      <chr>      <chr>          
1 id1           1 amazon1    open/close company energy 
2 id1           2 yahoo2     open       other          
3 id2           1 google1    open/close company energy 
4 id2           2 NA         NA         NA             
5 id3           1 yahoo1     close      goods and books
6 id3           2 amazon2    open       other   

CodePudding user response:

You can use left_join on a strsplit column of the second data set.

library(dplyr)
library(tidyr)

left_join(df1, dfcheck %>% 
  mutate(name = strsplit(name, ";")) %>% 
  unnest(name), c("title_name" = "name"))
# A tibble: 6 × 5
  id    title_num title_name status     stock
  <chr>     <dbl> <chr>      <chr>      <chr>
1 id1           1 amazon1    open/close company energy
2 id1           2 yahoo2     open       other
3 id2           1 google1    open/close company energy
4 id2           2 NA         NA         NA
5 id3           1 yahoo1     close      goods and books
6 id3           2 amazon2    open       other

Data

df1 <- structure(list(id = c("id1", "id1", "id2", "id2", "id3", "id3"
), title_num = c(1, 2, 1, 2, 1, 2), title_name = c("amazon1",
"yahoo2", "google1", NA, "yahoo1", "amazon2")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))

dfcheck <- structure(list(status = c("open/close", "close", "open"), stock = c("company energy",
"goods and books", "other"), name = c("amazon1;google1", "google3;yahoo1",
"yahoo2;amazon2;google2")), class = "data.frame", row.names = c(NA,
-3L))
  •  Tags:  
  • r
  • Related