Home > Software engineering >  Rename columns in a large list with different data frame lengths
Rename columns in a large list with different data frame lengths

Time:04-01

I have used lapply to create a large list containing 14000 individual data frames with various lengths, see this question for more background.

I now have a large list with many data frames that have different numbers of columns, ranging from 84 to 315. I would like to rename every data frame columns with two consistent names and a sequence from d0 to dxx, depending on the number of columns

col1   col2   col3   col4   col5 ... colxx
  ID    doy     d0     d1     d2       dxx

This means the first data frame will have a sequence of d0 to d312, while the last data frame will have a sequence of d0 to d81.

I tried the following:

df = lapply(df_sub, function(x) {names(x)[1:314] <- c("ID", "doy","d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30",                             "d31","d32","d33","d34","d35","d36","d37","d38","d39","d40","d41","d42","d43","d44","d45","d46","d47","d48","d49","d50",d51","d52","d53","d54","d55","d56","d57","d58","d59","d60",d61","d62","d63","d64","d65","d66","d67","d68","d69","d70","d71","d72","d73","d74","d75","d76","d77","d78","d79","d80","d81","d82","d83","d84","d85","d86","d87","d88","d89","d90","d91","d92","d93","d94","d95","d96","d97","d98","d99","d100",d101","d102","d103","d104","d105","d106","d107","d108","d109","d110","d111","d112","d113","d114","d115","d116","d117","d118","d119","d120","d121","d122","d123","d124","d125","d126","d127","d128","d129","d130","d131","d132","d133","d134","d135","d136","d137","d138","d139","d140","d141","d142","d143","d144","d145","d146","d147","d148","d149","d150","d151","d152","d153","d154","d155","d156","d157","d158","d159","d160","d161","d162","d163","d164","d165","d166","d167","d168","d169","d170","d171","d172","d173","d174","d175","d176","d177","d178","d179","d180","d181","d182","d183","d184","d185","d186","d187","d188","d189","d190","d191","d192","d193","d194","d195","d196","d197","d198","d199","d200","d201","d202","d203","d204","d205","d206","d207","d208","d209","d210","d211","d212","d213","d214","d215","d216","d217","d218","d219","d220","d221","d222","d223","d224","d225","d226","d227","d228","d229","d230","d231","d232","d233","d234","d235","d236","d237","d238","d239","d240","d241","d242","d243","d244","d245","d246","d247","d248","d249","d250","d251","d252","d253","d254","d255","d256","d257","d258","d259","d260","d261","d262","d263","d264","d265","d266","d267","d268","d269","d270","d271","d272","d273","d274","d275","d276","d277","d278","d279","d280","d281","d282","d283","d284","d285","d286","d287","d288","d289","d290","d291","d292","d293","d294","d295","d296","d297","d298","d299","d300","d301","d302","d303","d304","d305","d306","d307","d308","d309","d310","d311","d312"); x})

Error in names(x) <- `*vtmp*` : 
  'names' attribute [315] must be the same length as the vector [314]

How can this be done otherwise in R?

CodePudding user response:

library(magrittr)

# example data
dfs <- list(
  data.frame(a = 1, b = 2, c = 3),
  data.frame(x = 1, y = 2),
  data.frame(z = 0)
)

dfs %>%
  lapply(function(.x) {
    colnames(.x) <-
      .x %>%
      colnames() %>%
      length() %>%
      seq() %>%
      `-`(1) %>%
      paste0("d", .)
    .x
  })
#> [[1]]
#>   d0 d1 d2
#> 1  1  2  3
#> 
#> [[2]]
#>   d0 d1
#> 1  1  2
#> 
#> [[3]]
#>   d0
#> 1  0

Created on 2022-04-01 by the reprex package (v2.0.0)

CodePudding user response:

Only works if there are three or more columns, but I guess that can be assumed here.

# Sample Data
df <- lapply(list(mtcars,iris,CO2), head)

lapply(df, function(d) {setNames(d, c("ID", "doy", paste0("d", 0:(length(d) - 3))))})
#> [[1]]
#>                     ID doy  d0  d1   d2    d3    d4 d5 d6 d7 d8
#> Mazda RX4         21.0   6 160 110 3.90 2.620 16.46  0  1  4  4
#> Mazda RX4 Wag     21.0   6 160 110 3.90 2.875 17.02  0  1  4  4
#> Datsun 710        22.8   4 108  93 3.85 2.320 18.61  1  1  4  1
#> Hornet 4 Drive    21.4   6 258 110 3.08 3.215 19.44  1  0  3  1
#> Hornet Sportabout 18.7   8 360 175 3.15 3.440 17.02  0  0  3  2
#> Valiant           18.1   6 225 105 2.76 3.460 20.22  1  0  3  1
#> 
#> [[2]]
#>    ID doy  d0  d1     d2
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3.0 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5.0 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
#> 
#> [[3]]
#>    ID    doy         d0  d1   d2
#> 1 Qn1 Quebec nonchilled  95 16.0
#> 2 Qn1 Quebec nonchilled 175 30.4
#> 3 Qn1 Quebec nonchilled 250 34.8
#> 4 Qn1 Quebec nonchilled 350 37.2
#> 5 Qn1 Quebec nonchilled 500 35.3
#> 6 Qn1 Quebec nonchilled 675 39.2

Created on 2022-04-01 by the reprex package (v2.0.1)

CodePudding user response:

A minor modification to your existing code is to check how many columns the dataframe has, before creating the new names:

df <- list()
for(i in 1:3) df[[i]] <- as.data.frame(matrix(NA, 5, i 3))

f <- function(x){
  #Names first column "ID", second "Day", and subsequent columns d0, d1...
  names(x) <- c("ID","Day", paste0("d", seq(from=0, length.out=length(names(x))-2)))  
  x
}

lapply(df, f)
# [[1]]
#   ID Day d0 d1
# 1 NA  NA NA NA
# 2 NA  NA NA NA
# 3 NA  NA NA NA
# 4 NA  NA NA NA
# 5 NA  NA NA NA
# 
# [[2]]
#   ID Day d0 d1 d2
# 1 NA  NA NA NA NA
# 2 NA  NA NA NA NA
# 3 NA  NA NA NA NA
# 4 NA  NA NA NA NA
# 5 NA  NA NA NA NA
# 
# [[3]]
#   ID Day d0 d1 d2 d3
# 1 NA  NA NA NA NA NA
# 2 NA  NA NA NA NA NA
# 3 NA  NA NA NA NA NA
# 4 NA  NA NA NA NA NA
# 5 NA  NA NA NA NA NA
  • Related