Home > OS >  How do I write a for loop and if-else statements to rename and subset particular variables?
How do I write a for loop and if-else statements to rename and subset particular variables?

Time:02-20

I want to create a variant variable that contains all the variant type txf_df$type concatenated with its numeric sequence. For example, if its txf_df$type is J, and it's the first J type in the dataframe, it will be renamed as J1. Likewise, if the second type J, it will be renamed J2.

I only want to print variants J3, J10, and J13.

for (t in txf_df$type){
  for (n in 1:dim(t)[1]) {
    variant <- paste0(t, n)
    # J3, J10, and J13 variants are present in both normal and tumor samples
    if (variant[!"J3"||!"J10"||!"J13"]) {
      next
    }
    else {
      print(variant)
    }
  }
}

Data

> dput(txf_df)
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "16", class = "factor"), 
    start = c(12058964L, 12059311L, 12059311L, 12060052L, 12060198L, 
    12060198L, 12060877L, 12061000L, 12061427L, 23313591L, 23313758L, 
    23359913L, 23360231L, 23364122L, 23364395L, 23366620L, 23366810L, 
    23379177L, 23379280L, 23382620L, 23382783L, 23383097L, 23383204L, 
    23387059L, 23387176L, 23388486L, 23388561L, 23388650L, 23388707L, 
    23390027L, 23390088L, 23391415L, 23391490L, 23391742L, 23765948L, 
    23766437L, 23766982L, 23767054L, 23767168L, 23767248L, 23767384L, 
    23767514L, 23767709L, 23767770L, 23768522L, 23768644L, 23768851L, 
    25228285L, 25228378L, 25228519L, 25228766L, 25232778L, 25232904L, 
    25235683L, 25235897L, 25238389L, 25238523L, 25239765L, 29789561L, 
    29789627L, 29790581L, 29790642L, 29790826L, 29790958L, 29791425L, 
    56651502L, 56659585L, 56659791L, 56660378L, 56660378L, 56660443L, 
    56660792L, 56666534L, 56666671L, 56667252L, 56667317L, 56667663L, 
    56691855L, 56691998L, 56692587L, 56692652L, 56692985L, 56703726L, 
    56703824L, 56704418L, 56704483L, 56704810L, 66878282L, 66878430L, 
    66878841L, 66879137L, 66880933L, 66881130L, 66884444L, 66884562L, 
    66885366L, 66885461L, 66885567L, 66885629L, 66886615L, 66886770L, 
    66887279L, 66968347L, 66968374L, 66969614L, 66969879L, 66971940L, 
    66972144L, 66973120L, 66973261L, 66974125L, 66974258L, 66974340L, 
    66974598L, 66975027L, 66975125L, 66975409L, 66975549L, 66975671L, 
    66975751L, 66976008L, 66976152L, 66976551L, 66976551L, 66976640L, 
    66976688L, 66977202L, 66977274L, 66977742L, 67465036L, 67465416L, 
    67469531L, 67469743L, 67469860L, 67470045L, 67470152L, 67470289L, 
    67470491L, 82068858L, 82069294L, 82101775L, 82101987L, 82104547L, 
    82104732L, 82124507L, 82124644L, 82131680L, 56700653L, 56700882L, 
    56701224L, 56701224L, 56701289L, 56701292L, 56701878L, 75145758L, 
    75146419L, 75146500L, 75146611L, 75146732L, 75146822L, 75147433L, 
    75147560L, 75147640L, 75147768L, 75147864L, 75147864L, 75148063L, 
    75148132L, 75148424L, 75148583L, 75148725L, 75148866L, 75149096L, 
    75149237L, 75149446L, 75149558L, 75150547L), end = c(12059311L, 
    12060052L, 12061427L, 12060198L, 12060877L, 12061427L, 12061000L, 
    12061427L, 12061925L, 23313758L, 23359913L, 23360231L, 23364122L, 
    23364395L, 23366620L, 23366810L, 23379177L, 23379280L, 23382620L, 
    23382783L, 23383097L, 23383204L, 23387059L, 23387176L, 23388486L, 
    23388561L, 23388650L, 23388707L, 23390027L, 23390088L, 23391415L, 
    23391490L, 23391742L, 23392620L, 23766437L, 23766982L, 23767054L, 
    23767168L, 23767248L, 23767384L, 23767514L, 23767709L, 23767770L, 
    23768522L, 23768644L, 23768851L, 23770256L, 25228378L, 25228519L, 
    25228766L, 25232778L, 25232904L, 25235683L, 25235897L, 25238389L, 
    25238523L, 25239765L, 25240253L, 29789627L, 29790581L, 29790642L, 
    29790826L, 29790958L, 29791425L, 29792969L, 56667252L, 56659791L, 
    56660378L, 56660443L, 56661024L, 56660792L, 56661024L, 56666671L, 
    56667252L, 56667317L, 56667663L, 56667898L, 56691998L, 56692587L, 
    56692652L, 56692985L, 56693215L, 56703824L, 56704418L, 56704483L, 
    56704810L, 56705041L, 66878430L, 66880933L, 66879137L, 66880933L, 
    66881130L, 66884444L, 66884562L, 66885366L, 66885461L, 66885567L, 
    66885629L, 66886615L, 66886770L, 66887279L, 66888049L, 66969614L, 
    66969879L, 66971940L, 66971940L, 66972144L, 66973120L, 66973261L, 
    66974125L, 66974258L, 66974340L, 66974598L, 66975027L, 66975125L, 
    66975409L, 66975549L, 66975671L, 66975751L, 66976008L, 66976152L, 
    66976551L, 66976640L, 66976688L, 66977202L, 66977202L, 66977274L, 
    66977742L, 66978994L, 67465416L, 67469531L, 67469743L, 67469860L, 
    67470045L, 67470152L, 67470289L, 67470491L, 67471454L, 82069294L, 
    82101775L, 82101987L, 82104547L, 82104732L, 82124507L, 82124644L, 
    82131680L, 82132139L, 56700882L, 56701224L, 56701289L, 56701292L, 
    56701878L, 56701878L, 56701977L, 75146419L, 75146500L, 75146611L, 
    75146732L, 75146822L, 75147433L, 75147560L, 75147640L, 75147768L, 
    75147864L, 75148063L, 75148132L, 75148424L, 75148424L, 75148583L, 
    75148725L, 75148866L, 75149096L, 75149237L, 75149446L, 75149558L, 
    75150547L, 75150670L), width = c(348L, 742L, 2117L, 147L, 
    680L, 1230L, 124L, 428L, 499L, 168L, 46156L, 319L, 3892L, 
    274L, 2226L, 191L, 12368L, 104L, 3341L, 164L, 315L, 108L, 
    3856L, 118L, 1311L, 76L, 90L, 58L, 1321L, 62L, 1328L, 76L, 
    253L, 879L, 490L, 546L, 73L, 115L, 81L, 137L, 131L, 196L, 
    62L, 753L, 123L, 208L, 1406L, 94L, 142L, 248L, 4013L, 127L, 
    2780L, 215L, 2493L, 135L, 1243L, 489L, 67L, 955L, 62L, 185L, 
    133L, 468L, 1545L, 15751L, 207L, 588L, 66L, 647L, 350L, 233L, 
    138L, 582L, 66L, 347L, 236L, 144L, 590L, 66L, 334L, 231L, 
    99L, 595L, 66L, 328L, 232L, 149L, 2504L, 297L, 1797L, 198L, 
    3315L, 119L, 805L, 96L, 107L, 63L, 987L, 156L, 510L, 771L, 
    1268L, 1506L, 2327L, 2062L, 205L, 977L, 142L, 865L, 134L, 
    83L, 259L, 430L, 99L, 285L, 141L, 123L, 81L, 258L, 145L, 
    400L, 90L, 138L, 563L, 515L, 73L, 469L, 1253L, 381L, 4116L, 
    213L, 118L, 186L, 108L, 138L, 203L, 964L, 437L, 32482L, 213L, 
    2561L, 186L, 19776L, 138L, 7037L, 460L, 230L, 343L, 66L, 
    69L, 590L, 587L, 100L, 662L, 82L, 112L, 122L, 91L, 612L, 
    128L, 81L, 129L, 97L, 200L, 269L, 362L, 293L, 160L, 143L, 
    142L, 231L, 142L, 210L, 113L, 990L, 124L), strand = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c(" ", 
    "-", "*"), class = "factor"), type = structure(c(3L, 1L, 
    1L, 2L, 1L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 
    3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 
    2L, 1L, 4L, 1L, 3L, 1L, 2L, 4L, 1L, 4L, 3L, 1L, 2L, 1L, 4L, 
    3L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 4L, 3L, 1L, 3L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 3L, 3L, 1L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 2L, 1L, 1L, 2L, 1L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    4L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 4L, 1L, 2L, 2L, 1L, 
    1L, 3L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 3L), .Label = c("J", 
    "I", "F", "L", "U"), class = "factor"), txName = structure(list(
        c("uc002dbv.3", "uc010buy.3", "uc010buz.3"), c("uc002dbv.3", 
        "uc010buy.3"), "uc010buz.3", c("uc002dbv.3", "uc010buy.3"
        ), "uc010buy.3", "uc002dbv.3", "uc010buy.3", "uc010buy.3", 
        c("uc002dbv.3", "uc010buy.3", "uc010buz.3"), "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dln.3", "uc002dln.3", "uc002dln.3", "uc002dln.3", 
        "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", 
        "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", 
        "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", "uc002dmb.1", 
        "uc002dmb.1", "uc002doc.3", "uc002doc.3", "uc002doc.3", 
        "uc002doc.3", "uc002doc.3", "uc002doc.3", "uc002doc.3", 
        "uc002doc.3", "uc002doc.3", "uc002doc.3", "uc002doc.3", 
        "uc002dtr.4", "uc002dtr.4", "uc002dtr.4", "uc002dtr.4", 
        "uc002dtr.4", "uc002dtr.4", "uc002dtr.4", "uc010vhe.2", 
        c("uc002ejl.3", "uc002ejm.3"), c("uc002ejl.3", "uc002ejm.3"
        ), "uc002ejl.3", "uc002ejm.3", "uc002ejl.3", "uc002ejl.3", 
        "uc002ejn.3", "uc002ejn.3", c("uc010vhe.2", "uc002ejn.3"
        ), c("uc010vhe.2", "uc002ejn.3"), c("uc010vhe.2", "uc002ejn.3"
        ), "uc002ejt.3", "uc002ejt.3", "uc002ejt.3", "uc002ejt.3", 
        "uc002ejt.3", "uc002ejw.3", "uc002ejw.3", "uc002ejw.3", 
        "uc002ejw.3", "uc002ejw.3", "uc002eqi.3", "uc002eqi.3", 
        "uc002eqj.3", "uc002eqj.3", c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqi.3", "uc002eqj.3"), c("uc002eqi.3", "uc002eqj.3"
        ), c("uc002eqq.3", "uc002eqr.3"), "uc002eqs.3", c("uc002eqq.3", 
        "uc002eqr.3"), "uc002eqs.3", c("uc002eqq.3", "uc002eqr.3", 
        "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
        ), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", 
        "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", 
        "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
        ), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", 
        "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", 
        "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
        ), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", 
        "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", 
        "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"
        ), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", 
        "uc002eqr.3", "uc002eqs.3"), "uc002eqq.3", c("uc002eqr.3", 
        "uc002eqs.3"), "uc002eqq.3", c("uc002eqr.3", "uc002eqs.3"
        ), c("uc002eqq.3", "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", 
        "uc002eqr.3", "uc002eqs.3"), c("uc002eqq.3", "uc002eqr.3", 
        "uc002eqs.3"), "uc002etd.3", "uc002etd.3", "uc002etd.3", 
        "uc002etd.3", "uc002etd.3", "uc002etd.3", "uc002etd.3", 
        "uc002etd.3", "uc002etd.3", "uc002fgv.3", "uc002fgv.3", 
        "uc002fgv.3", "uc002fgv.3", "uc002fgv.3", "uc002fgv.3", 
        "uc002fgv.3", "uc002fgv.3", "uc002fgv.3", c("uc002eju.1", 
        "uc002ejv.1"), c("uc002eju.1", "uc002ejv.1"), "uc002eju.1", 
        "uc002ejv.1", "uc002eju.1", "uc002ejv.1", c("uc002eju.1", 
        "uc002ejv.1"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), "uc002fdn.3", "uc002fdm.3", "uc002fdn.3", 
        "uc002fdm.3", c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3"), c("uc002fdm.3", 
        "uc002fdn.3"), c("uc002fdm.3", "uc002fdn.3")), class = "AsIs"), 
    geneName = structure(list("608", "608", "608", "608", "608", 
        "608", "608", "608", "608", "6338", "6338", "6338", "6338", 
        "6338", "6338", "6338", "6338", "6338", "6338", "6338", 
        "6338", "6338", "6338", "6338", "6338", "6338", "6338", 
        "6338", "6338", "6338", "6338", "6338", "6338", "6338", 
        "63928", "63928", "63928", "63928", "63928", "63928", 
        "63928", "63928", "63928", "63928", "63928", "63928", 
        "63928", "343", "343", "343", "343", "343", "343", "343", 
        "343", "343", "343", "343", "653808", "653808", "653808", 
        "653808", "653808", "653808", "653808", "4499", "4493", 
        "4493", "4493", "4493", "4493", "4493", "4499", "4499", 
        "4499", "4499", "4499", "4494", "4494", "4494", "4494", 
        "4494", "4496", "4496", "4496", "4496", "4496", "766", 
        "766", "766", "766", "766", "766", "766", "766", "766", 
        "766", "766", "766", "766", "766", "766", "8824", "8824", 
        "8824", "8824", "8824", "8824", "8824", "8824", "8824", 
        "8824", "8824", "8824", "8824", "8824", "8824", "8824", 
        "8824", "8824", "8824", "8824", "8824", "8824", "8824", 
        "8824", "8824", "8824", "8824", "3291", "3291", "3291", 
        "3291", "3291", "3291", "3291", "3291", "3291", "3294", 
        "3294", "3294", "3294", "3294", "3294", "3294", "3294", 
        "3294", "4495", "4495", "4495", "4495", "4495", "4495", 
        "4495", "197257", "197257", "197257", "197257", "197257", 
        "197257", "197257", "197257", "197257", "197257", "197257", 
        "197257", "197257", "197257", "197257", "197257", "197257", 
        "197257", "197257", "197257", "197257", "197257", "197257"), class = "AsIs")), row.names = c(NA, 
-177L), class = "data.frame")

CodePudding user response:

Consider ave for groupwise calculation and use seq_along for group numbering:

txf_df$variant <- with(
    txf_df,
    paste0(type, ave(1:nrow(txf_df), type, FUN=seq_along))
)

While it is not clear why you need to dump out those all those specific variants to console without context, consider subset to View or print all or one column of data frame by those particular variants:

sub_txf_df <- subset(
    txf_df,
    variant %in% c("J3", "J10", "J13")
)

View(sub_txf_df)

print(head(sub_txf_df))
print(tail(sub_txf_df))

print(sub_txf_df$variant)

CodePudding user response:

As I understand it, it is necessary to group the data by the sequence in which the type appears, I did this using a function from the dplyr library (group_by mutate), then you can proceed normally with the loop. Notice the difference between the condition you made and the one that would be suitable. Using df as the dataframe

library(dplyr)

df = df %>%
  group_by(type) %>%
  mutate(id_in_group = row_number())

for (i in 1:nrow(df)) {
  variant <- paste0(df$type[i], df$id_in_group[i])
  if (!(variant %in% c("J3", "J10", "J13"))) {
    next
  } else {
    print(variant)
  }
}

Or, in a more elegant way, with dplyr

df %>%
  group_by(type) %>%
  mutate(id_in_group = row_number()) %>%
  mutate(variant = paste0(type, id_in_group)) %>%
  filter(!(variant %in% c("J3", "J10", "J13")))

df %>%
  group_by(type) %>%
  mutate(id_in_group = row_number()) %>%
  mutate(variant = paste0(type, id_in_group)) %>%
  filter(variant %in% c("J3", "J10", "J13"))
  •  Tags:  
  • r
  • Related