Checking if each row has the column name appear in another data.frame, report TRUE and FALSE-CodePudding

I'd like to fill in df3 with TRUE or FALSE if the code is present in df2. I couldn't find straightforward words to describe it so I made toy examples below. I tried do.call(paste0, df3) %in% do.call(paste0, df2) but it's not reporting what I needed and I was not sure what was it reporting. Could someone direct me to a function that I could use?

 df0=data.frame(ID=paste0("id_",1:5), Code=paste0("00",seq(1,10,2)))
 df1=data.frame(ID=rep(paste0("id_",1:2)), Code=paste0("00",seq(1,10,3)))
 df2=rbind.data.frame(df0,df1)
 df3=data.frame(ID=unique(df2$ID))
 namevector <- as.character(unique(df2$Code))
 df3[ , namevector] <- NA

> df2
        ID Code
    1 id_1  001
    2 id_2  003
    3 id_3  005
    4 id_4  007
    5 id_5  009
    6 id_1  001
    7 id_2  004
    8 id_1  007
    9 id_2 0010

> df3
    ID 001 003 005 007 009 004 0010
1 id_1  NA  NA  NA  NA  NA  NA   NA
2 id_2  NA  NA  NA  NA  NA  NA   NA
3 id_3  NA  NA  NA  NA  NA  NA   NA
4 id_4  NA  NA  NA  NA  NA  NA   NA
5 id_5  NA  NA  NA  NA  NA  NA   NA

CodePudding user response：

In base R you could do this very simply as a one-liner:

table(df2$ID, df2$Code) > 0
      
#>         001  0010   003   004   005   007   009
#>  id_1  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
#>  id_2 FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
#>  id_3 FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
#>  id_4 FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
#>  id_5 FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Or, as akrun points out, even just:

table(df2) > 0
#>       Code
#> ID       001  0010   003   004   005   007   009
#>   id_1  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
#>   id_2 FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
#>   id_3 FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
#>   id_4 FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
#>   id_5 FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

CodePudding user response：

Consider using pivot_wider to reshape the 'df2'

library(tidyr)
pivot_wider(df2, names_from = Code, values_from = Code, 
     values_fn = ~ length(.x) > 0, values_fill = FALSE)

-output

# A tibble: 5 × 8
  ID    `001` `003` `005` `007` `009` `004` `0010`
  <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> 
1 id_1  TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE 
2 id_2  FALSE TRUE  FALSE FALSE FALSE TRUE  TRUE  
3 id_3  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE 
4 id_4  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE 
5 id_5  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE

CodePudding user response：

You can use pivot_wider() from tidyr with the arg set values_fn = Negate(is.null):

library(tidyr)

pivot_wider(
  df2,
  names_from = Code,
  values_from = Code,
  values_fn = Negate(is.null),
  values_fill = FALSE
)

# A tibble: 5 × 8
  ID    `001` `003` `005` `007` `009` `004` `0010`
  <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> 
1 id_1  TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE 
2 id_2  FALSE TRUE  FALSE FALSE FALSE TRUE  TRUE  
3 id_3  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE 
4 id_4  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE 
5 id_5  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE