Home > Software engineering >  Keep only the rows which contain some entries in at least one of its columns
Keep only the rows which contain some entries in at least one of its columns

Time:03-24

I have the following dataframe df

structure(list(Names = c("James", "Diana", "Dr. Michel", "Mr. George", "Hello world", "Janis","JJK"), code = c("xx34-2111", "7789-2869", "dg67-5555", "1555-2uuy", "fhgj-2jj8", "2199-7kk8","1111-1111"), `13/A1` = c(NA, "‡(2017)", NA, "‡(2017)", "A", 
"A(2019)", "A(2016)"), `13/A2` = c(NA, "‡(2017)", NA, "‡(2017)", 
"A", "A(2019)", "A(2016)"), `13/A3` = c(NA, "‡(2017)", NA, "‡(2017)", 
"A", "A(2019)", "A(2016)"), `13/A4` = c(NA, "‡(2017)", NA, "‡(2017)", 
"A", "A(2019)", "A(2016)"), `13/A5` = c(NA, "‡(2017)", NA, "‡(2017)", 
"A", "A(2019)", "A(2016)"), `13/B1` = c("A", "A", "A(2017)", 
"‡(2017)", "A", "A(2019)", NA), `13/B2` = c("A", "A", "A(2017)", 
"‡(2017)", "A", "A(2019)", NA), `13/B3` = c("A", "A", "A(2017)", 
"‡(2017)", "A", "A(2019)", NA), `13/B4` = c("A", "A", "A(2017)", 
"‡(2017)", "A", "A(2019)", NA), `13/B5` = c("A", "A", "A(2017)", 
"‡(2017)", "A", "A(2019)", NA), `13/C1` = c(NA, "A", NA, "‡(2017)", 
"A", "A(2019)", NA), `13/D1` = c(NA, "‡(2017)", NA, "‡(2017)", 
NA, "A", NA), `13/D2` = c(NA, "‡(2017)", NA, "‡(2017)", NA, "A", 
NA), `13/D3` = c(NA, "‡(2017)", NA, "‡(2017)", NA, "A", "A(2017)"
), `13/D4` = c("A", "‡(2017)", NA, "‡(2017)", NA, "A", NA)), row.names = c(1L, 
6L, 11L, 39L, 101L, 483L, 667L), class = "data.frame")

I would like to keep only the rows of df which contain in at least one of the columns 3:17, any of the following entries: A, A(2016), A(2017), A(2019).

For the example above, this means to delete only the row containing Mr. George in the field Names.

My attempt

df[,3:17]%in% c("A","A(2016)","A(2017)","A(2019)")

does not give the desired result.

CodePudding user response:

Loop through columns - sapply, check if values matches - %in%, then check if any column TRUE per row - rowSums(...) > 0

df[ rowSums(sapply(df[, 3:17], function(i) i %in% c("A","A(2016)","A(2017)","A(2019)"))) > 0, ]

CodePudding user response:

Using grepl

> rowSums(matrix(grepl("^A$|A\\(2019\\)|A\\(2017\\)|A\\(2016\\)",unlist(df)),nrow(df)))>0

[1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE

CodePudding user response:

With a fully dplyr approach:

library(dplyr)

df %>% 
  filter(if_any(3:17, ~ .x %in% c("A","A(2016)","A(2017)","A(2019)")))

#>         Names      code   13/A1   13/A2   13/A3   13/A4   13/A5   13/B1   13/B2
#> 1       James xx34-2111    <NA>    <NA>    <NA>    <NA>    <NA>       A       A
#> 2       Diana 7789-2869 ‡(2017) ‡(2017) ‡(2017) ‡(2017) ‡(2017)       A       A
#> 3  Dr. Michel dg67-5555    <NA>    <NA>    <NA>    <NA>    <NA> A(2017) A(2017)
#> 4 Hello world fhgj-2jj8       A       A       A       A       A       A       A
#> 5       Janis 2199-7kk8 A(2019) A(2019) A(2019) A(2019) A(2019) A(2019) A(2019)
#> 6         JJK 1111-1111 A(2016) A(2016) A(2016) A(2016) A(2016)    <NA>    <NA>
#>     13/B3   13/B4   13/B5   13/C1   13/D1   13/D2   13/D3   13/D4
#> 1       A       A       A    <NA>    <NA>    <NA>    <NA>       A
#> 2       A       A       A       A ‡(2017) ‡(2017) ‡(2017) ‡(2017)
#> 3 A(2017) A(2017) A(2017)    <NA>    <NA>    <NA>    <NA>    <NA>
#> 4       A       A       A       A    <NA>    <NA>    <NA>    <NA>
#> 5 A(2019) A(2019) A(2019) A(2019)       A       A       A       A
#> 6    <NA>    <NA>    <NA>    <NA>    <NA>    <NA> A(2017)    <NA>

Another possible solution:

library(magrittr)

df[((df[,3:17] == "A")   (df[,3:17] == "A(2016)")  
    (df[,3:17] == "A(2017)")   (df[,3:17] == "A(2019)")) %>% 
       apply(1, \(x) sum(x[3:17], na.rm = T)) != 0, ]

#>           Names      code   13/A1   13/A2   13/A3   13/A4   13/A5   13/B1
#> 1         James xx34-2111    <NA>    <NA>    <NA>    <NA>    <NA>       A
#> 6         Diana 7789-2869 ‡(2017) ‡(2017) ‡(2017) ‡(2017) ‡(2017)       A
#> 11   Dr. Michel dg67-5555    <NA>    <NA>    <NA>    <NA>    <NA> A(2017)
#> 101 Hello world fhgj-2jj8       A       A       A       A       A       A
#> 483       Janis 2199-7kk8 A(2019) A(2019) A(2019) A(2019) A(2019) A(2019)
#> 667         JJK 1111-1111 A(2016) A(2016) A(2016) A(2016) A(2016)    <NA>
#>       13/B2   13/B3   13/B4   13/B5   13/C1   13/D1   13/D2   13/D3   13/D4
#> 1         A       A       A       A    <NA>    <NA>    <NA>    <NA>       A
#> 6         A       A       A       A       A ‡(2017) ‡(2017) ‡(2017) ‡(2017)
#> 11  A(2017) A(2017) A(2017) A(2017)    <NA>    <NA>    <NA>    <NA>    <NA>
#> 101       A       A       A       A       A    <NA>    <NA>    <NA>    <NA>
#> 483 A(2019) A(2019) A(2019) A(2019) A(2019)       A       A       A       A
#> 667    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA> A(2017)    <NA>
  • Related