Home > Back-end >  r regex Remove all character before and after bracket
r regex Remove all character before and after bracket

Time:10-25

Is it possible to remove all characters before and after bracket except first column from the column name?

Input df

df <- data.frame(CCLE_ID = c("AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA" ), `A1BG (1)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_ ), `A1CF (29974)` = c(0.0100738474498, 0.00419071223405, 0.161435671978, 0.00437517766114, 0.00494118028018), `A2M (2)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `A2ML1 (144568)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `A4GALT (53947)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `A4GNT (51146)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `AAAS (8086)` = c(0.0261000247231, 0.00339180018571, 0.0124666557843, 0.00222981468535, 0.00236993307389 ), `AACS (65985)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `AADAC (13)` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), check.names = FALSE) 



Output df

df1 <- data.frame(CCLE_ID = c("AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA", "AUTONOMIC_GANGLIA" ), `1` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_ ), `29974` = c(0.0100738474498, 0.00419071223405, 0.161435671978, 0.00437517766114, 0.00494118028018), `2` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `144568` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `53947` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `51146` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `8086` = c(0.0261000247231, 0.00339180018571, 0.0124666557843, 0.00222981468535, 0.00236993307389 ), `65985` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `13` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), check.names = FALSE) 

and then change the column name of df1 (old to new) using another data frame df2

df2 <- data.frame(oldname = c("CCLE_ID", "1", "29974", "2", "144568", "53947", "51146", "8086", "65985", "13"), newname = c("CCLE_ID", "ESN", "PSA", "TGI", "PICJ", "TMNS", "IUJE", "UED", "PUQD", "STGW" ), check.names = FALSE)

Thank you in advance.

CodePudding user response:

Try this

df %>% 
  rename_with(
    ~ str_match(.x, "\\((\\d )\\)$")[,2],
    .cols = -CCLE_ID)

Results in

> df %>%
    rename_with(
      ~ str_match(.x, "\\((\\d )\\)$")[,2],
      .cols = -CCLE_ID)
            CCLE_ID  1       29974  2 144568 53947 51146        8086 65985 13
1 AUTONOMIC_GANGLIA NA 0.010073847 NA     NA    NA    NA 0.026100025    NA NA
2 AUTONOMIC_GANGLIA NA 0.004190712 NA     NA    NA    NA 0.003391800    NA NA
3 AUTONOMIC_GANGLIA NA 0.161435672 NA     NA    NA    NA 0.012466656    NA NA
4 AUTONOMIC_GANGLIA NA 0.004375178 NA     NA    NA    NA 0.002229815    NA NA
5 AUTONOMIC_GANGLIA NA 0.004941180 NA     NA    NA    NA 0.002369933    NA NA
  • Related