Home > Software design >  How to filter for rows across any columns for na and 0 but not with true value
How to filter for rows across any columns for na and 0 but not with true value

Time:09-04

Here's my dataframe

df1 = structure(list(item = c(NA, "polygon", NA, NA, NA, NA, NA, "polygon", 
"polygon", "0", NA, "polygon", "polygon"), identity = c("pol", 
"0", NA, NA, NA, NA, "0", "pol", "pol", "0", NA, "pol", "pol"
), unique = c("polx", "0", "0", "0", "0", "0", "0", "polx", "0", 
"0", NA, "polx", "0"), detect = c("ox", "0", "0", "0", "0", "0", 
NA, NA, "ox", "0", NA, "ox", NA), structure = c("hex", "0", "0", 
"0", "0", "0", NA, "hex", "hex", "0", NA, "hex", "hex"), key = c("IUGDG", 
"0", "0", "0", NA, "0", NA, "IUGDG", "IUGDG", "0", NA, "IUGDG", 
"IUGDG"), code = c("adg434kk", "0", "0", "0", NA, "0", NA, "adg434kk", 
"adg434kk", "0", NA, "adg434kk", "adg434kk")), class = "data.frame", row.names = c(NA, 
-13L))

      item identity unique detect structure   key     code
1     <NA>      pol   polx     ox       hex IUGDG adg434kk
2  polygon        0      0      0         0     0        0
3     <NA>     <NA>      0      0         0     0        0
4     <NA>     <NA>      0      0         0     0        0
5     <NA>     <NA>      0      0         0  <NA>     <NA>
6     <NA>     <NA>      0      0         0     0        0
7     <NA>        0      0   <NA>      <NA>  <NA>     <NA>
8  polygon      pol   polx   <NA>       hex IUGDG adg434kk
9  polygon      pol      0     ox       hex IUGDG adg434kk
10       0        0      0      0         0     0        0
11    <NA>     <NA>   <NA>   <NA>      <NA>  <NA>     <NA>
12 polygon      pol   polx     ox       hex IUGDG adg434kk
13 polygon      pol      0   <NA>       hex IUGDG adg434kk

Expected output:

     item identity unique detect structure   key     code
1    <NA>     <NA>      0      0         0     0        0
2    <NA>     <NA>      0      0         0     0        0
3    <NA>     <NA>      0      0         0  <NA>     <NA>
4    <NA>     <NA>      0      0         0     0        0
5    <NA>        0      0   <NA>      <NA>  <NA>     <NA>

I have tried with something similar to what i have posted earlier at How to filter across any columns for na and empty value but not 0 and changed to syntax as below but it's not working:

df1 %>%
  filter(if_any(item:code, ~ is.na(.x)),
         if_any(item:code, ~ .x == 0),
         !if_any(item:code, complete.cases))

Can someone help please? Thanks.

CodePudding user response:

# Extract rows that only consists of NA and zero
df2 <- df1[ rowSums(is.na(df1) | (df1 == 0)) == ncol(df1), ]

> df2
   item identity unique detect structure  key code
3  <NA>     <NA>      0      0         0    0    0
4  <NA>     <NA>      0      0         0    0    0
5  <NA>     <NA>      0      0         0 <NA> <NA>
6  <NA>     <NA>      0      0         0    0    0
7  <NA>        0      0   <NA>      <NA> <NA> <NA>
10    0        0      0      0         0    0    0
11 <NA>     <NA>   <NA>   <NA>      <NA> <NA> <NA>


# Exclude rows with only NA and zero
df3 <- df2[ rowSums(is.na(df2)) != ncol(df2) & rowSums(is.na(df2)) > 0, ]

> df3
  item identity unique detect structure  key code
3 <NA>     <NA>      0      0         0    0    0
4 <NA>     <NA>      0      0         0    0    0
5 <NA>     <NA>      0      0         0 <NA> <NA>
6 <NA>     <NA>      0      0         0    0    0
7 <NA>        0      0   <NA>      <NA> <NA> <NA>

CodePudding user response:

I managed to solve it myself:

df2 <- filter(df1, if_all(everything(), ~ is.na(.) | .x == 0)) %>%
          filter(if_any(item:code, ~ .x == 0), 
                 if_any(item:code, ~ is.na(.)))

  item identity unique detect structure  key code
1 <NA>     <NA>      0      0         0    0    0
2 <NA>     <NA>      0      0         0    0    0
3 <NA>     <NA>      0      0         0 <NA> <NA>
4 <NA>     <NA>      0      0         0    0    0
5 <NA>        0      0   <NA>      <NA> <NA> <NA>

filter(df1, if_all(everything(), ~ is.na(.) | .x == 0)) gives all rows with any columns that contain

  item identity unique detect structure  key code
1 <NA>     <NA>      0      0         0    0    0
2 <NA>     <NA>      0      0         0    0    0
3 <NA>     <NA>      0      0         0 <NA> <NA>
4 <NA>     <NA>      0      0         0    0    0
5 <NA>        0      0   <NA>      <NA> <NA> <NA>
6    0        0      0      0         0    0    0
7 <NA>     <NA>   <NA>   <NA>      <NA> <NA> <NA>

and perform

filter(if_any(item:code, ~ .x == 0), 
                 if_any(item:code, ~ is.na(.)))

to capture only rows with any columns that has both NA and 0

  • Related