I created a function that creates unique column names from the existing ones: renameCol
.
If I manually create a vector of new column names using that function I can manually set those as the new column names. However, if I use that function in rename_with
I get an error about unique names.
library(tidyverse)
renameCol = function(colname)
{
match = str_match_all(colname, "HealthcareProvider((TaxonomyCode|PrimaryTaxonomySwitch))_([0-9] )")[[1]]
coltype = match[[3]]
coltype = str_remove(coltype, "(Taxonomy|PrimaryTaxonomy)")
number = match[[4]]
return(paste0(coltype, "_", number))
}
renameCol("HealthcareProviderPrimaryTaxonomySwitch_11")
#> [1] "Switch_11"
renameCol("HealthcareProviderTaxonomyCode_11")
#> [1] "Code_11"
tb = tibble(
HealthcareProviderPrimaryTaxonomySwitch_11 = 1,
HealthcareProviderTaxonomyCode_3 = 2,
HealthcareProviderPrimaryTaxonomySwitch_9 = 3,
HealthcareProviderTaxonomyCode_13 = 4
)
tb %>% rename_with(renameCol)
#> Error in `rename_with()`:
#> ! Names must be unique.
#> x These names are duplicated:
#> * "Switch_11" at locations 1, 2, 3, and 4.
new_colnames = colnames(tb) %>% sapply(renameCol, USE.NAMES = F)
new_colnames
#> [1] "Switch_11" "Code_3" "Switch_9" "Code_13"
colnames(tb) = new_colnames
tb
#> # A tibble: 1 x 4
#> Switch_11 Code_3 Switch_9 Code_13
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 3 4
Created on 2022-06-16 by the reprex package (v2.0.1)
CodePudding user response:
The answer is present in your question itself. Your function is not vectorised. It works for only one column name at a time.
library(tidyverse)
names(tb)
#[1] "HealthcareProviderPrimaryTaxonomySwitch_11"
#[2] "HealthcareProviderTaxonomyCode_3"
#[3] "HealthcareProviderPrimaryTaxonomySwitch_9"
#[4] "HealthcareProviderTaxonomyCode_13"
renameCol(names(tb))
#[1] "Switch_11"
Hence you have to use sapply
to make it work for all the columns. rename_with
is not a loop (like sapply
) so to make it work you can do -
tb %>% rename_with(~sapply(., renameCol))
# A tibble: 1 × 4
# Switch_11 Code_3 Switch_9 Code_13
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 3 4
Or change the function to work with more than one column name.
renameCol = function(colname)
{
match = str_match_all(colname, "HealthcareProvider((TaxonomyCode|PrimaryTaxonomySwitch))_([0-9] )")
match_data <- do.call(rbind, match)
coltype = match_data[, 3]
coltype = str_remove(coltype, "(Taxonomy|PrimaryTaxonomy)")
number = match_data[, 4]
return(paste0(coltype, "_", number))
}
renameCol(names(tb))
#[1] "Switch_11" "Code_3" "Switch_9" "Code_13"
tb %>% rename_with(renameCol)
# A tibble: 1 × 4
# Switch_11 Code_3 Switch_9 Code_13
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 3 4