I want to create a variant
variable that contains all the variant type txf_df$type
concatenated with its numeric sequence. For example, if its txf_df$type
is J
, and it's the first J
type in the dataframe, it will be renamed as J1
. Likewise, if the second type J
, it will be renamed J2
.
I only want to print variants
J3, J10, and J13.
for (t in txf_df$type){
for (n in 1:dim(t)[1]) {
variant <- paste0(t, n)
# J3, J10, and J13 variants are present in both normal and tumor samples
if (variant[!"J3"||!"J10"||!"J13"]) {
next
}
else {
print(variant)
}
}
}
Data
> dput(txf_df)
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "16", class = "factor"),
start = c(12058964L, 12059311L, 12059311L, 12060052L, 12060198L,
12060198L, 12060877L, 12061000L, 12061427L, 23313591L, 23313758L,
23359913L, 23360231L, 23364122L, 23364395L, 23366620L, 23366810L,
23379177L, 23379280L, 23382620L, 23382783L, 23383097L, 23383204L,
23387059L, 23387176L, 23388486L, 23388561L, 23388650L, 23388707L,
23390027L, 23390088L, 23391415L, 23391490L, 23391742L, 23765948L,
23766437L, 23766982L, 23767054L, 23767168L, 23767248L, 23767384L,
23767514L, 23767709L, 23767770L, 23768522L, 23768644L, 23768851L,
25228285L, 25228378L, 25228519L, 25228766L, 25232778L, 25232904L,
25235683L, 25235897L, 25238389L, 25238523L, 25239765L, 29789561L,
29789627L, 29790581L, 29790642L, 29790826L, 29790958L, 29791425L,
56651502L, 56659585L, 56659791L, 56660378L, 56660378L, 56660443L,
56660792L, 56666534L, 56666671L, 56667252L, 56667317L, 56667663L,
56691855L, 56691998L, 56692587L, 56692652L, 56692985L, 56703726L,
56703824L, 56704418L, 56704483L, 56704810L, 66878282L, 66878430L,
66878841L, 66879137L, 66880933L, 66881130L, 66884444L, 66884562L,
66885366L, 66885461L, 66885567L, 66885629L, 66886615L, 66886770L,
66887279L, 66968347L, 66968374L, 66969614L, 66969879L, 66971940L,
66972144L, 66973120L, 66973261L, 66974125L, 66974258L, 66974340L,
66974598L, 66975027L, 66975125L, 66975409L, 66975549L, 66975671L,
66975751L, 66976008L, 66976152L, 66976551L, 66976551L, 66976640L,
66976688L, 66977202L, 66977274L, 66977742L, 67465036L, 67465416L,
67469531L, 67469743L, 67469860L, 67470045L, 67470152L, 67470289L,
67470491L, 82068858L, 82069294L, 82101775L, 82101987L, 82104547L,
82104732L, 82124507L, 82124644L, 82131680L, 56700653L, 56700882L,
56701224L, 56701224L, 56701289L, 56701292L, 56701878L, 75145758L,
75146419L, 75146500L, 75146611L, 75146732L, 75146822L, 75147433L,
75147560L, 75147640L, 75147768L, 75147864L, 75147864L, 75148063L,
75148132L, 75148424L, 75148583L, 75148725L, 75148866L, 75149096L,
75149237L, 75149446L, 75149558L, 75150547L), end = c(12059311L,
12060052L, 12061427L, 12060198L, 12060877L, 12061427L, 12061000L,
12061427L, 12061925L, 23313758L, 23359913L, 23360231L, 23364122L,
23364395L, 23366620L, 23366810L, 23379177L, 23379280L, 23382620L,
23382783L, 23383097L, 23383204L, 23387059L, 23387176L, 23388486L,
23388561L, 23388650L, 23388707L, 23390027L, 23390088L, 23391415L,
23391490L, 23391742L, 23392620L, 23766437L, 23766982L, 23767054L,
23767168L, 23767248L, 23767384L, 23767514L, 23767709L, 23767770L,
23768522L, 23768644L, 23768851L, 23770256L, 25228378L, 25228519L,
25228766L, 25232778L, 25232904L, 25235683L, 25235897L, 25238389L,
25238523L, 25239765L, 25240253L, 29789627L, 29790581L, 29790642L,
29790826L, 29790958L, 29791425L, 29792969L, 56667252L, 56659791L,
56660378L, 56660443L, 56661024L, 56660792L, 56661024L, 56666671L,
56667252L, 56667317L, 56667663L, 56667898L, 56691998L, 56692587L,
56692652L, 56692985L, 56693215L, 56703824L, 56704418L, 56704483L,
56704810L, 56705041L, 66878430L, 66880933L, 66879137L, 66880933L,
66881130L, 66884444L, 66884562L, 66885366L, 66885461L, 66885567L,
66885629L, 66886615L, 66886770L, 66887279L, 66888049L, 66969614L,
66969879L, 66971940L, 66971940L, 66972144L, 66973120L, 66973261L,
66974125L, 66974258L, 66974340L, 66974598L, 66975027L, 66975125L,
66975409L, 66975549L, 66975671L, 66975751L, 66976008L, 66976152L,
66976551L, 66976640L, 66976688L, 66977202L, 66977202L, 66977274L,
66977742L, 66978994L, 67465416L, 67469531L, 67469743L, 67469860L,
67470045L, 67470152L, 67470289L, 67470491L, 67471454L, 82069294L,
82101775L, 82101987L, 82104547L, 82104732L, 82124507L, 82124644L,
82131680L, 82132139L, 56700882L, 56701224L, 56701289L, 56701292L,
56701878L, 56701878L, 56701977L, 75146419L, 75146500L, 75146611L,
75146732L, 75146822L, 75147433L, 75147560L, 75147640L, 75147768L,
75147864L, 75148063L, 75148132L, 75148424L, 75148424L, 75148583L,
75148725L, 75148866L, 75149096L, 75149237L, 75149446L, 75149558L,
75150547L, 75150670L), width = c(348L, 742L, 2117L, 147L,
680L, 1230L, 124L, 428L, 499L, 168L, 46156L, 319L, 3892L,
274L, 2226L, 191L, 12368L, 104L, 3341L, 164L, 315L, 108L,
3856L, 118L, 1311L, 76L, 90L, 58L, 1321L, 62L, 1328L, 76L,
253L, 879L, 490L, 546L, 73L, 115L, 81L, 137L, 131L, 196L,
62L, 753L, 123L, 208L, 1406L, 94L, 142L, 248L, 4013L, 127L,
2780L, 215L, 2493L, 135L, 1243L, 489L, 67L, 955L, 62L, 185L,
133L, 468L, 1545L, 15751L, 207L, 588L, 66L, 647L, 350L, 233L,
138L, 582L, 66L, 347L, 236L, 144L, 590L, 66L, 334L, 231L,
99L, 595L, 66L, 328L, 232L, 149L, 2504L, 297L, 1797L, 198L,
3315L, 119L, 805L, 96L, 107L, 63L, 987L, 156L, 510L, 771L,
1268L, 1506L, 2327L, 2062L, 205L, 977L, 142L, 865L, 134L,
83L, 259L, 430L, 99L, 285L, 141L, 123L, 81L, 258L, 145L,
400L, 90L, 138L, 563L, 515L, 73L, 469L, 1253L, 381L, 4116L,
213L, 118L, 186L, 108L, 138L, 203L, 964L, 437L, 32482L, 213L,
2561L, 186L, 19776L, 138L, 7037L, 460L, 230L, 343L, 66L,
69L, 590L, 587L, 100L, 662L, 82L, 112L, 122L, 91L, 612L,
128L, 81L, 129L, 97L, 200L, 269L, 362L, 293L, 160L, 143L,
142L, 231L, 142L, 210L, 113L, 990L, 124L), strand = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c(" ",
"-", "*"), class = "factor"), type = structure(c(3L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L,
3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L,
2L, 1L, 4L, 1L, 3L, 1L, 2L, 4L, 1L, 4L, 3L, 1L, 2L, 1L, 4L,
3L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 4L, 3L, 1L, 3L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 3L, 3L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 4L, 1L, 2L, 2L, 1L,
1L, 3L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 3L), .Label = c("J",
"I", "F", "L", "U"), class = "factor"), txName = structure(list(
c("uc002dbv.3", "uc010buy.3", "uc010buz.3"), c("uc002dbv.3",
"uc010buy.3"), "uc010buz.3", c("uc002dbv.3", "uc010buy.3"
), "uc010buy.3", "uc002dbv.3", "uc010buy.3", "uc010buy.3",
c("uc002dbv.3", "uc010buy.3", "uc010buz.3"), "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3",
"uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1",
"uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1",
"uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1",
"uc002dmb.1", "uc002doc.3", "uc002doc.3", "uc002doc.3",
"uc002doc.3", "uc002doc.3", "uc002doc.3", "uc002doc.3",
"uc002doc.3", "uc002doc.3", "uc002doc.3", "uc002doc.3",
"uc002dtr.4", "uc002dtr.4", "uc002dtr.4", "uc002dtr.4",
"uc002dtr.4", "uc002dtr.4", "uc002dtr.4", "uc010vhe.2",
c("uc002ejl.3", "uc002ejm.3"), c("uc002ejl.3", "uc002ejm.3"
), "uc002ejl.3", "uc002ejm.3", "uc002ejl.3", "uc002ejl.3",
"uc002ejn.3", "uc002ejn.3", c("uc010vhe.2", "uc002ejn.3"
), c("uc010vhe.2", "uc002ejn.3"), c("uc010vhe.2", "uc002ejn.3"
), "uc002ejt.3", "uc002ejt.3", "uc002ejt.3", "uc002ejt.3",
"uc002ejt.3", "uc002ejw.3", "uc002ejw.3", "uc002ejw.3",
"uc002ejw.3", "uc002ejw.3", "uc002eqi.3", "uc002eqi.3",
"uc002eqj.3", "uc002eqj.3", c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
), c("uc002eqq.3", "uc002eqr.3"), "uc002eqs.3", c("uc002eqq.3",
"uc002eqr.3"), "uc002eqs.3", c("uc002eqq.3", "uc002eqr.3",
"uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3",
"uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3",
"uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3",
"uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3",
"uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3",
"uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3",
"uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3",
"uc002eqr.3", "uc002eqs.3"), "uc002eqq.3", c("uc002eqr.3",
"uc002eqs.3"), "uc002eqq.3", c("uc002eqr.3", "uc002eqs.3"
), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3",
"uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3",
"uc002eqs.3"), "uc002etd.3", "uc002etd.3", "uc002etd.3",
"uc002etd.3", "uc002etd.3", "uc002etd.3", "uc002etd.3",
"uc002etd.3", "uc002etd.3", "uc002fgv.3", "uc002fgv.3",
"uc002fgv.3", "uc002fgv.3", "uc002fgv.3", "uc002fgv.3",
"uc002fgv.3", "uc002fgv.3", "uc002fgv.3", c("uc002eju.1",
"uc002ejv.1"), c("uc002eju.1", "uc002ejv.1"), "uc002eju.1",
"uc002ejv.1", "uc002eju.1", "uc002ejv.1", c("uc002eju.1",
"uc002ejv.1"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), "uc002fdn.3", "uc002fdm.3", "uc002fdn.3",
"uc002fdm.3", c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3",
"uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3")), class = "AsIs"),
geneName = structure(list("608", "608", "608", "608", "608",
"608", "608", "608", "608", "6338", "6338", "6338", "6338",
"6338", "6338", "6338", "6338", "6338", "6338", "6338",
"6338", "6338", "6338", "6338", "6338", "6338", "6338",
"6338", "6338", "6338", "6338", "6338", "6338", "6338",
"63928", "63928", "63928", "63928", "63928", "63928",
"63928", "63928", "63928", "63928", "63928", "63928",
"63928", "343", "343", "343", "343", "343", "343", "343",
"343", "343", "343", "343", "653808", "653808", "653808",
"653808", "653808", "653808", "653808", "4499", "4493",
"4493", "4493", "4493", "4493", "4493", "4499", "4499",
"4499", "4499", "4499", "4494", "4494", "4494", "4494",
"4494", "4496", "4496", "4496", "4496", "4496", "766",
"766", "766", "766", "766", "766", "766", "766", "766",
"766", "766", "766", "766", "766", "766", "8824", "8824",
"8824", "8824", "8824", "8824", "8824", "8824", "8824",
"8824", "8824", "8824", "8824", "8824", "8824", "8824",
"8824", "8824", "8824", "8824", "8824", "8824", "8824",
"8824", "8824", "8824", "8824", "3291", "3291", "3291",
"3291", "3291", "3291", "3291", "3291", "3291", "3294",
"3294", "3294", "3294", "3294", "3294", "3294", "3294",
"3294", "4495", "4495", "4495", "4495", "4495", "4495",
"4495", "197257", "197257", "197257", "197257", "197257",
"197257", "197257", "197257", "197257", "197257", "197257",
"197257", "197257", "197257", "197257", "197257", "197257",
"197257", "197257", "197257", "197257", "197257", "197257"), class = "AsIs")), row.names = c(NA,
-177L), class = "data.frame")
CodePudding user response:
Consider ave
for groupwise calculation and use seq_along
for group numbering:
txf_df$variant <- with(
txf_df,
paste0(type, ave(1:nrow(txf_df), type, FUN=seq_along))
)
While it is not clear why you need to dump out those all those specific variants to console without context, consider subset
to View
or print
all or one column of data frame by those particular variants:
sub_txf_df <- subset(
txf_df,
variant %in% c("J3", "J10", "J13")
)
View(sub_txf_df)
print(head(sub_txf_df))
print(tail(sub_txf_df))
print(sub_txf_df$variant)
CodePudding user response:
As I understand it, it is necessary to group the data by the sequence in which the type appears, I did this using a function from the dplyr library (group_by mutate), then you can proceed normally with the loop. Notice the difference between the condition you made and the one that would be suitable. Using df as the dataframe
library(dplyr)
df = df %>%
group_by(type) %>%
mutate(id_in_group = row_number())
for (i in 1:nrow(df)) {
variant <- paste0(df$type[i], df$id_in_group[i])
if (!(variant %in% c("J3", "J10", "J13"))) {
next
} else {
print(variant)
}
}
Or, in a more elegant way, with dplyr
df %>%
group_by(type) %>%
mutate(id_in_group = row_number()) %>%
mutate(variant = paste0(type, id_in_group)) %>%
filter(!(variant %in% c("J3", "J10", "J13")))
df %>%
group_by(type) %>%
mutate(id_in_group = row_number()) %>%
mutate(variant = paste0(type, id_in_group)) %>%
filter(variant %in% c("J3", "J10", "J13"))