I have a dataframe and I am looking to filter and extract row(s) between values containing 0.5 and 1.0 across all columns. Is there a way to extract by specifying multiple conditions?
Thank you,
Md
dput(Data)
structure(list(Genes = c("Gene_1", "Gene_2", "Gene_3",
"Gene_4", "Gene_5", "Gene_6", "Gene_7", "Gene_8",
"Gene_9", "Gene_10"), `S1` = c(0.883643926, 0.248614376,
0.518091486, 0.535221236, 0.415450436, -0.940323826, -0.723796576,
-0.824290276, NA, -0.806255146), `S2` = c(1.005757776, 1.005757776,
4.51601548, 3, 7.78620408, -0.706674058, -0.572657338, -0.686018538,
-0.514713298, -0.532390248), `S3` = c(7.798089, 9.2058061,
5.5408169, 1.52159119, 2.63042701, NA, 1.3857699, -0.152939869,
-0.050295909, -0.337659179), `S4` = c(1.41324408, 9.6038562,
1.71087962, 2.95921938, 4.82199712, 3.17140358, 1.15931318, NA,
1.58997338, 4.76858598), `S5` = c(-0.167945369, 1.41324408,
1.41324408, 0.741171721, 2.494610191, -0.532343489, -0.358607189,
-0.442774239, -0.103589789, 0.213156301)), row.names = c(NA,
10L), class = "data.frame")
#> Genes S1 S2 S3 S4 S5
#> 1 Gene_1 0.8836439 1.0057578 7.79808900 1.413244 -0.1679454
#> 2 Gene_2 0.2486144 1.0057578 9.20580610 9.603856 1.4132441
#> 3 Gene_3 0.5180915 4.5160155 5.54081690 1.710880 1.4132441
#> 4 Gene_4 0.5352212 3.0000000 1.52159119 2.959219 0.7411717
#> 5 Gene_5 0.4154504 7.7862041 2.63042701 4.821997 2.4946102
#> 6 Gene_6 -0.9403238 -0.7066741 NA 3.171404 -0.5323435
#> 7 Gene_7 -0.7237966 -0.5726573 1.38576990 1.159313 -0.3586072
#> 8 Gene_8 -0.8242903 -0.6860185 -0.15293987 NA -0.4427742
#> 9 Gene_9 NA -0.5147133 -0.05029591 1.589973 -0.1035898
#> 10 Gene_10 -0.8062551 -0.5323902 -0.33765918 4.768586 0.2131563
# For instance, filter by <= 0.1
library(dplyr)
Data %>%
select(Genes, S1:S5) %>%
filter_all(any_vars(. <= 0.1))
Created on 2022-02-22 by the reprex package (v2.0.1)
CodePudding user response:
One way of doing it is to remove first column and use apply
:
Data[which(apply(Data[, -1], 1, function(x) any(x >= 0.5 & x <= 1))),]
Genes S1 S2 S3 S4 S5
1 Gene_1 0.8836439 1.005758 7.798089 1.413244 -0.1679454
3 Gene_3 0.5180915 4.516015 5.540817 1.710880 1.4132441
4 Gene_4 0.5352212 3.000000 1.521591 2.959219 0.7411717
CodePudding user response:
Here's a dplyr
approach.
You can use if_any
within filter
to apply to all columns.
library(dplyr)
Data %>% filter(if_any(!Genes, ~between(.x, 0.5, 1)))
Genes S1 S2 S3 S4 S5
1 Gene_1 0.8836439 1.005758 7.798089 1.413244 -0.1679454
2 Gene_3 0.5180915 4.516015 5.540817 1.710880 1.4132441
3 Gene_4 0.5352212 3.000000 1.521591 2.959219 0.7411717