For each column name with nM*
prefix, I want to find each row with value <500 and store the corresponding Peptide
row value as pep500
. My code below returns an empty pep500
vector.
netmhc <- read.table("31917_NetMHC.xls", sep="\t", row.names=NULL, header=TRUE, skip=1)
# Find peptides with binding affinity <500nM
pep500 <- vector()
for(i in 1:ncol(netmhc)){
if(grepl('nM', colnames(netmhc[i])==TRUE)){
for(j in 1:nrow(netmhc)) {
if(j>=500){
skip
}
else{
for(k in netmhc["Peptide"]){
pep500[i] <- k
}
}
}
}
}
Data structure
structure(list(Pos = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Peptide = c("VPVISPRIH", "SGRHNWEVG", "SGIHPGATT",
"TELAKHQAI", "QHTHAIESN", "QVALEELRR", "SLALLELSV", "KFHATEENR",
"STESEIPDY", "AALLQSKSN", "VEGLCYLTG", "RPGLWPAPN", "LNGLDQQMI",
"EELQWIFPS", "GASPVPNPS"), ID = 1:15, nM = c(46125.8, 43001.7,
30957.3, 11468.1, 45431.9, 46655.8, 166.5, 40224.7, 38301.1,
42644, 44350.7, 29476.3, 20441.6, 9224.3, 28582.4), Rank = c(75,
55, 23, 9.5, 70, 85, 1.5, 41, 35, 50, 60, 22, 14, 8, 21), Core = c("VPVISPRIH",
"SGRHNWEVG", "SGIHPGATT", "TELAKHQAI", "QHTHAIESN", "QVALEELRR",
"SLALLELSV", "KFHATEENR", "STESEIPDY", "AALLQSKSN", "VEGLCYLTG",
"RPGLWPAPN", "LNGLDQQMI", "EELQWIFPS", "GASPVPNPS"), nM.1 = c(32273.4,
24415.1, 37350.4, 1954.7, 39374.7, 39018.4, 8669.8, 42118.9,
37011.7, 26852.3, 32381.1, 25391.5, 19678.8, 32284.2, 28078.5
), Rank.1 = c(60, 33, 80, 2.5, 90, 90, 8.5, 99, 80, 40, 60, 36,
23, 60, 43), Core.1 = c("VPVISPRIH", "SGRHNWEVG", "SGIHPGATT",
"TELAKHQAI", "QHTHAIESN", "QVALEELRR", "SLALLELSV", "KFHATEENR",
"STESEIPDY", "AALLQSKSN", "VEGLCYLTG", "RPGLWPAPN", "LNGLDQQMI",
"EELQWIFPS", "GASPVPNPS"), H_Avg_Ranks = c(66.667, 41.25, 35.728,
3.958, 78.75, 87.429, 2.55, 57.986, 48.696, 44.444, 60, 27.31,
17.405, 14.118, 28.219), N_binders = c(0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-15L))
Expected output:
pep500 = SLALLELSV
CodePudding user response:
We may use if_any
in filter
i.e. loop over the columns with names starts_with
'nM' prefix, check whether any of those columns have less than 500 in any columns in the looped
library(dplyr)
pep500 <- df %>%
filter(if_any(starts_with("nM"), ~ .x < 500))
-output
pep500
Pos Peptide ID nM Rank Core nM.1 Rank.1 Core.1 H_Avg_Ranks N_binders
1 0 SLALLELSV 7 166.5 1.5 SLALLELSV 8669.8 8.5 SLALLELSV 2.55 1