I am trying to extract features-compounds lying in different percentile (25th, 50th, 75th) from the density plot. Then save these features in the new data.frame
. I will then use these new features and map with the original data.frame
. Identification of these features would help in further analysis and in-depth exploration. I have provided example data and density/boxplot (screenshot below).
dput(Delta)
structure(list(`PC1-PC2` = c(0.0161933528045602, 0.766612235998576,
-0.237724873642335, -0.0733015604900428, 0.400545815637124, 0.414481719044214,
0.208303811501068, 0.392408339922047, 0.336514581021898, -0.320322998122561,
0.36615463065484, -0.263557666645363, 0.180272570114807, 0.255255831254277,
0.0138502697450574, 0.23798933387042, -0.296936870921566, 0.206190306805568,
0.141038353337885, 0.167942308239497, 0.147174778368622, -0.0111611567646942,
-0.141468109519736, 0.11179112137823, 0.114216799808335, 0.0185917572079534,
0.0147028493400293), Gene_Symbols = structure(c(15L, 13L, 21L,
9L, 2L, 7L, 1L, 19L, 14L, 5L, 17L, 24L, 18L, 8L, 27L, 20L, 12L,
26L, 4L, 23L, 3L, 6L, 16L, 22L, 11L, 25L, 10L), .Label = c("Feature_1_Compound_2",
"Feature_1_Compound_3", "Feature_10_Compound_1", "Feature_10_Compound_2",
"Feature_10_Compound_3", "Feature_2_Compound_2", "Feature_2_Compound_3",
"Feature_3_Compound_1", "Feature_3_Compound_2", "Feature_4_Compound_1",
"Feature_4_Compound_2", "Feature_4_Compound_3", "Feature_5_Compound_1",
"Feature_5_Compound_2", "Feature_5_Compound_3", "Feature_6_Compound_1",
"Feature_6_Compound_2", "Feature_6_Compound_3", "Feature_7_Compound_1",
"Feature_7_Compound_2", "Feature_7_Compound_3", "Feature_8_Compound_1",
"Feature_8_Compound_2", "Feature_8_Compound_3", "Feature_9_Compound_1",
"Feature_9_Compound_2", "Feature_9_Compound_3"), class = "factor")), row.names = c("Feature_5_Compound_3",
"Feature_5_Compound_1", "Feature_7_Compound_3", "Feature_3_Compound_2",
"Feature_1_Compound_3", "Feature_2_Compound_3", "Feature_1_Compound_2",
"Feature_7_Compound_1", "Feature_5_Compound_2", "Feature_10_Compound_3",
"Feature_6_Compound_2", "Feature_8_Compound_3", "Feature_6_Compound_3",
"Feature_3_Compound_1", "Feature_9_Compound_3", "Feature_7_Compound_2",
"Feature_4_Compound_3", "Feature_9_Compound_2", "Feature_10_Compound_2",
"Feature_8_Compound_2", "Feature_10_Compound_1", "Feature_2_Compound_2",
"Feature_6_Compound_1", "Feature_8_Compound_1", "Feature_4_Compound_2",
"Feature_9_Compound_1", "Feature_4_Compound_1"), class = "data.frame")
#> PC1-PC2 Gene_Symbols
#> Feature_5_Compound_3 0.01619335 Feature_5_Compound_3
#> Feature_5_Compound_1 0.76661224 Feature_5_Compound_1
#> Feature_7_Compound_3 -0.23772487 Feature_7_Compound_3
#> Feature_3_Compound_2 -0.07330156 Feature_3_Compound_2
#> Feature_1_Compound_3 0.40054582 Feature_1_Compound_3
#> Feature_2_Compound_3 0.41448172 Feature_2_Compound_3
#> Feature_1_Compound_2 0.20830381 Feature_1_Compound_2
#> Feature_7_Compound_1 0.39240834 Feature_7_Compound_1
#> Feature_5_Compound_2 0.33651458 Feature_5_Compound_2
#> Feature_10_Compound_3 -0.32032300 Feature_10_Compound_3
#> Feature_6_Compound_2 0.36615463 Feature_6_Compound_2
#> Feature_8_Compound_3 -0.26355767 Feature_8_Compound_3
#> Feature_6_Compound_3 0.18027257 Feature_6_Compound_3
#> Feature_3_Compound_1 0.25525583 Feature_3_Compound_1
#> Feature_9_Compound_3 0.01385027 Feature_9_Compound_3
#> Feature_7_Compound_2 0.23798933 Feature_7_Compound_2
#> Feature_4_Compound_3 -0.29693687 Feature_4_Compound_3
#> Feature_9_Compound_2 0.20619031 Feature_9_Compound_2
#> Feature_10_Compound_2 0.14103835 Feature_10_Compound_2
#> Feature_8_Compound_2 0.16794231 Feature_8_Compound_2
#> Feature_10_Compound_1 0.14717478 Feature_10_Compound_1
#> Feature_2_Compound_2 -0.01116116 Feature_2_Compound_2
#> Feature_6_Compound_1 -0.14146811 Feature_6_Compound_1
#> Feature_8_Compound_1 0.11179112 Feature_8_Compound_1
#> Feature_4_Compound_2 0.11421680 Feature_4_Compound_2
#> Feature_9_Compound_1 0.01859176 Feature_9_Compound_1
#> Feature_4_Compound_1 0.01470285 Feature_4_Compound_1
# Density distribution
plt2 <- ggdensity(Delta, x = "PC1-PC2", y = "..count..",
xlab = "Delta (PC1-PC2)",
ylab = "Number of genes",
fill = "lightgray", color = "black",
label = "Gene_Symbols", repel = TRUE,
font.label = list(color= "PC1-PC2"),
xticks.by = 20, # Break x ticks by 20
gradient.cols = c("blue", "red"),
legend = c(0.7, 0.6),
legend.title = "" # Hide legend title
)
#
library(dplyr)
library(ggplot2)
plt1 <- Delta %>% select(`PC1-PC2`) %>%
ggplot(aes(x="", y = `PC1-PC2`))
geom_boxplot(fill = "lightblue", color = "black")
coord_flip()
theme_classic()
xlab("")
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
# install.packages("egg", dependencies = TRUE)
egg::ggarrange(plt2, plt1, heights = 2:1)
Thank You,
Toufiq
CodePudding user response:
Extract feature between 25th and 75th percentile of PC1-PC2:
Delta %>% filter(`PC1-PC2` >= quantile(Delta$`PC1-PC2`, .25) &
`PC1-PC2` <= quantile(Delta$`PC1-PC2`, .75) )
PC1-PC2 Gene_Symbols
Feature_5_Compound_3 0.01619335 Feature_5_Compound_3
Feature_1_Compound_2 0.20830381 Feature_1_Compound_2
Feature_6_Compound_3 0.18027257 Feature_6_Compound_3
Feature_9_Compound_3 0.01385027 Feature_9_Compound_3
Feature_7_Compound_2 0.23798933 Feature_7_Compound_2
Feature_9_Compound_2 0.20619031 Feature_9_Compound_2
Feature_10_Compound_2 0.14103835 Feature_10_Compound_2
Feature_8_Compound_2 0.16794231 Feature_8_Compound_2
Feature_10_Compound_1 0.14717478 Feature_10_Compound_1
Feature_8_Compound_1 0.11179112 Feature_8_Compound_1
Feature_4_Compound_2 0.11421680 Feature_4_Compound_2
Feature_9_Compound_1 0.01859176 Feature_9_Compound_1
Feature_4_Compound_1 0.01470285 Feature_4_Compound_1