Home > Mobile >  Keep only letters in all rows of specific column - remove all other characters
Keep only letters in all rows of specific column - remove all other characters

Time:12-09

This is how example data look like:

exp_data
#                       Name Greg   Matt
# 1 Y.L[ 12,000]STISKDLITY.M   NA L[ 12]
# 2 Y.L[ 12,000]STISKDLITY.M   NA L[ 12]

dput:

 exp_data <- structure(list(Name = structure(c(71L,71L), .Label = c("F.AM[ 15,995]KTKAAL.A", "F.AMKTKAAL.A", "F.EKIKAAY.L", 
 "F.EKIKAAYL.S", "F.NPTAGC[ 58,005]ASL[ 12,000]AKEM[ 12,000]F[ 1151,607].A", 
 "F.QGRVTM[ 15,995].T", "F.SGSNSGNTATL.T", "F.TGYY.M", "F.TNC[ 58,005]DF[ 1151,607]EKIKAAY.L", 
 "L.DKSITSL[ 370,222]Y.A", "L.DY[ 12,000]WGQGTL.V", "L.DYWGQGTL.V", 
 "L.EQVSQL.Q", "L.EQVSQLQGLW.R", "L.EWMGW.I", "L.ITY[ 1151,607]M[ 15,995]SGTKSTEF.N", 
 "L.KQQGGGLEVL.F", "L.KQQGGGLEVLF.Q", "L.L[ 504,270]KQQGGGLEVL.F", 
 "L.LKQQGGGL.E", "L.LKQQGGGLEVL.F", "L.QGLW.R", "L.RSDDTAVY.Y", 
 "L.RSDDTAVYY.C", "L.SRLRSDDTAVY.Y", "L.SRLRSDDTAVYY.C", "L.STISKDL[ 12,000]ITY.M", 
 "L.STISKDLITY.M", "L.STISKDLITY[ 1012,607]M[ 15,995].S", "L.STISKDLITY[ 12,000].M", 
 "L.STISKDLITY[ 12,000]M[ 386,228].S", "L.STISKDLITY[ 2918,448].M", 
 "L.STISKDLITY[ 762,322]M[ 15,995].S", "L.STISKDLITYM.S", "L.STISKDLITYM[ 1282,648].S", 
 "L.STISKDLITYM[ 1456,695].S", "L.STISKDLITYM[ 1490,759].S", "L.STISKDLITYM[ 371,206].S", 
 "L.TEIQSL.T", "L.TISRVEAGDEADY.Y", "L.TISRVEAGDEADY[ 12,000]Y.C", 
 "L.TISRVEAGDEADYY.C", "L.TISRVEAGDEADYY[ 12,000].C", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY.V", 
 "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1239,661].V", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1987,847].V", 
 "L.VVY[ 1501,680]DDSDRPSGIPERF.S", "L.VVYDDSDRPSGIPERF.S", "M.KKARKSKVTTNKC[ 58,005]L[ 2909,467]EQVSQLQGL.W", 
 "M.SGTKSTEF.N", "M.TELDYW.G", "M.TRDTSISTAY.M", "M.TRDTSISTAY[ 12,000].M", 
 "M.TRDTSISTAYM.E", "M.TRDTSISTAYMEL.S", "W.GQGTL.V", "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY.V", 
 "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1239,661].V", "W.INPNSGGTNY.A", 
 "W.INPNSGGTNY[ 12,000].A", "W.VRQAPGQGL.E", "W.VRQAPGQGLEW.M", 
 "W.VRQAPGQGLEW[ 12,000]M[ 486,244].G", "W.VRQAPGQGLEWM.G", "W.Y[ 12,000]QQKPGQAPVLVVY.D", 
 "W.YQQKPGQAPVL.V", "W.YQQKPGQAPVL[ 12,000]VVY.D", "W.YQQKPGQAPVLVVY.D", 
 "Y.AQKF.Q", "Y.DDSDRPSGIPERF.S", "Y.L[ 12,000]STISKDLITY.M", 
 "Y.LSTISKDL.I", "Y.LSTISKDL[ 12,000]ITY.M", "Y.LSTISKDLITY.M", 
 "Y.M[ 12,000]SGTKSTEF.N", "Y.M[ 15,995]EL.S", "Y.M[ 15,995]ELSRL.R", 
 "Y.M[ 15,995]SGTKSTEF.N", "Y.MELSRL.R", "Y.MSGTKSTEF.N", "Y.QQKPGQAPVL.V", 
 "Y.QQKPGQAPVL[ 12,000]VVY.D", "Y.QQKPGQAPVL[ 12,000]VVYDDSDRPSGIPERF.S", 
 "Y.QQKPGQAPVLVVY.D", "Y.QQKPGQAPVLVVYDDSDRPSGIPERF.S", "Y.TFTGY.Y", 
 "Y.TFTGYY.M", "Y.TILDKSITSL.Y", "Y.VLTQPPSVSVAPGQTARITC[ 58,005]GGNNIGSKSVHW.Y", 
 "Y.WGQGTL.V", "Y.YMHW.V"), class = "factor"), Greg = c(NA, 
 NA), Matt = structure(c(6L, 6L), .Label = c("","C[ 58]", "C[ 58], F[ 1152]", "C[ 58], F[ 1152], L[ 12], M[ 12]", 
 "C[ 58], L[ 2909]", "L[ 12]", "L[ 370]", "L[ 504]", "M[ 12]", 
 "M[ 1283]", "M[ 1457]", "M[ 1491]", "M[ 16]", "M[ 16], Y[ 1013]", 
 "M[ 16], Y[ 1152]", "M[ 16], Y[ 762]", "M[ 371]", "M[ 386], Y[ 12]", 
 "M[ 486], W[ 12]", "Y[ 12]", "Y[ 1240]", "Y[ 1502]", "Y[ 1988]", 
 "Y[ 2918]"), class = "factor")), row.names = 1:2, class = "data.frame")

I would like to focus on column named Name and keep only letters in all of the rows. Data frame is extremely long and rows contains all type of characters (numbers, dots, question marks, etc) at the begining of the string, in the middle, in the end, between specific letters. I would like to keep only letters in all of these rows.

CodePudding user response:

Using gsub:

exp_data$Name <- gsub("[^A-Za-z] ", "", exp_data$Name)

CodePudding user response:

exp_data$clean_name = gsub(x = exp_data$Name, pattern = "[^a-zA-Z]", replacement = "")
exp_data
#                       Name Greg   Matt    clean_name
# 1 Y.L[ 12,000]STISKDLITY.M   NA L[ 12] YLSTISKDLITYM
# 2 Y.L[ 12,000]STISKDLITY.M   NA L[ 12] YLSTISKDLITYM
  •  Tags:  
  • r
  • Related