Home > database >  Checking if each row has the column name appear in another data.frame, report TRUE and FALSE
Checking if each row has the column name appear in another data.frame, report TRUE and FALSE

Time:04-26

I'd like to fill in df3 with TRUE or FALSE if the code is present in df2. I couldn't find straightforward words to describe it so I made toy examples below. I tried do.call(paste0, df3) %in% do.call(paste0, df2) but it's not reporting what I needed and I was not sure what was it reporting. Could someone direct me to a function that I could use?

 df0=data.frame(ID=paste0("id_",1:5), Code=paste0("00",seq(1,10,2)))
 df1=data.frame(ID=rep(paste0("id_",1:2)), Code=paste0("00",seq(1,10,3)))
 df2=rbind.data.frame(df0,df1)
 df3=data.frame(ID=unique(df2$ID))
 namevector <- as.character(unique(df2$Code))
 df3[ , namevector] <- NA

> df2
        ID Code
    1 id_1  001
    2 id_2  003
    3 id_3  005
    4 id_4  007
    5 id_5  009
    6 id_1  001
    7 id_2  004
    8 id_1  007
    9 id_2 0010

> df3
    ID 001 003 005 007 009 004 0010
1 id_1  NA  NA  NA  NA  NA  NA   NA
2 id_2  NA  NA  NA  NA  NA  NA   NA
3 id_3  NA  NA  NA  NA  NA  NA   NA
4 id_4  NA  NA  NA  NA  NA  NA   NA
5 id_5  NA  NA  NA  NA  NA  NA   NA

CodePudding user response:

In base R you could do this very simply as a one-liner:

table(df2$ID, df2$Code) > 0
      
#>         001  0010   003   004   005   007   009
#>  id_1  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
#>  id_2 FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
#>  id_3 FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
#>  id_4 FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
#>  id_5 FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Or, as akrun points out, even just:

table(df2) > 0
#>       Code
#> ID       001  0010   003   004   005   007   009
#>   id_1  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
#>   id_2 FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
#>   id_3 FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
#>   id_4 FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
#>   id_5 FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

CodePudding user response:

Consider using pivot_wider to reshape the 'df2'

library(tidyr)
pivot_wider(df2, names_from = Code, values_from = Code, 
     values_fn = ~ length(.x) > 0, values_fill = FALSE)

-output

# A tibble: 5 × 8
  ID    `001` `003` `005` `007` `009` `004` `0010`
  <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> 
1 id_1  TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE 
2 id_2  FALSE TRUE  FALSE FALSE FALSE TRUE  TRUE  
3 id_3  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE 
4 id_4  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE 
5 id_5  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE 

CodePudding user response:

You can use pivot_wider() from tidyr with the arg set values_fn = Negate(is.null):

library(tidyr)

pivot_wider(
  df2,
  names_from = Code,
  values_from = Code,
  values_fn = Negate(is.null),
  values_fill = FALSE
)

# A tibble: 5 × 8
  ID    `001` `003` `005` `007` `009` `004` `0010`
  <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> 
1 id_1  TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE 
2 id_2  FALSE TRUE  FALSE FALSE FALSE TRUE  TRUE  
3 id_3  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE 
4 id_4  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE 
5 id_5  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE 
  •  Tags:  
  • r
  • Related