Home > front end >  Split 600 columns into 2 new columns for each one, with old and new column names in vectors, in R
Split 600 columns into 2 new columns for each one, with old and new column names in vectors, in R

Time:05-06

I want to split 600 columns (listed in a vector) at a delimiter (in this case a '/') into 2 new columns for each one (also listed as vectors). I've worked out basic logic for the split as shown below, but is there a better way than having hundereds of lines of code to get this done? Any ideas?

df1 <- data.frame(codes1 = c('H5394/6N938', '49J62/P82', '142X394/652876','1057C83/25394', '45N564/3558', '49405/A8174'),
                  codes2 = c('W34142/83X652', '5L622/482S3', '44N574/358866','1P47484/724A94', '454N64/3458', '49A05/28774'),
                  codes3 = c('19K4/139D54', '4T3962/78D43', '18V94/682876','P083/28394', '434/8558', '43405/2387N'))
originalvar <- c('codes1', 'codes2', 'codes3')
newleftvar <- c('leftcode_0hr', 'leftcode_2hr', 'leftcode_4hr')
newrightvar <- c('rightcode_0hr', 'rightcode_2hr', 'rightcode_4hr')

df1

          codes1         codes2       codes3
1    H5394/6N938  W34142/83X652  19K4/139D54
2      49J62/P82    5L622/482S3 4T3962/78D43
3 142X394/652876  44N574/358866 18V94/682876
4  1057C83/25394 1P47484/724A94   P083/28394
5    45N564/3558    454N64/3458     434/8558
6    49405/A8174    49A05/28774  43405/2387N
#my lame approach - lol
df1$leftcode_0hr <- substr(df1$codes1, 1, stringr::str_locate(df1$codes1, "/")-1)
df1$rightcode_0hr <- substr(df1$codes1, stringr::str_locate(df1$codes1, "/") 1, nchar(df1$codes1))
df1$leftcode_2hr <- substr(df1$codes2, 1, stringr::str_locate(df1$codes2, "/")-1)
df1$rightcode_2hr <- substr(df1$codes2, stringr::str_locate(df1$codes2, "/") 1, nchar(df1$codes2))
df1$leftcode_4hr <- substr(df1$codes3, 1, stringr::str_locate(df1$codes3, "/")-1)
df1$rightcode_4hr <- substr(df1$codes3, stringr::str_locate(df1$codes3, "/") 1, nchar(df1$codes3))

df1

          codes1         codes2       codes3 leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1    H5394/6N938  W34142/83X652  19K4/139D54        H5394         6N938       W34142        83X652         19K4        139D54
2      49J62/P82    5L622/482S3 4T3962/78D43        49J62           P82        5L622         482S3       4T3962         78D43
3 142X394/652876  44N574/358866 18V94/682876      142X394        652876       44N574        358866        18V94        682876
4  1057C83/25394 1P47484/724A94   P083/28394      1057C83         25394      1P47484        724A94         P083         28394
5    45N564/3558    454N64/3458     434/8558       45N564          3558       454N64          3458          434          8558
6    49405/A8174    49A05/28774  43405/2387N        49405         A8174        49A05         28774        43405         2387N

CodePudding user response:

You can do this:

separate(
  df1 %>% pivot_longer(everything()),
  value, into=c("left", "right"), sep = "/"
) %>%
  pivot_wider(names_from="name", values_from = left:right, values_fn = list,names_sep = "") %>% 
  unnest(everything()) %>% 
  rename_with(~paste0(
    str_extract(.x,"^(left|right)code"),
    paste0("_",as.numeric(str_extract(.x,"\\d "))*2-2,"hr")
  )) %>% 
  relocate(unlist(lapply(1:ncol(df1),\(i) c(i,i ncol(df1)))))

Output:

  leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
  <chr>        <chr>         <chr>        <chr>         <chr>        <chr>        
1 H5394        6N938         W34142       83X652        19K4         139D54       
2 49J62        P82           5L622        482S3         4T3962       78D43        
3 142X394      652876        44N574       358866        18V94        682876       
4 1057C83      25394         1P47484      724A94        P083         28394        
5 45N564       3558          454N64       3458          434          8558         
6 49405        A8174         49A05        28774         43405        2387N        

Here is another approach, which simply takes your initial idea, and wraps it in a function, which can then be applied to each column name of df1:

f <- function(code) {
  suffix = paste0("_", as.numeric(str_extract(code,"\\d "))*2-2,"hr")
  setNames(
    list(
      substr(df1[[code]], 1, stringr::str_locate(df1[[code]], "/")-1),
      substr(df1[[code]], stringr::str_locate(df1[[code]], "/") 1, nchar(df1[[code]]))
    ),paste0(c("leftcode", "rightcode"),suffix)
  )
}
as.data.frame(lapply(colnames(df1), f))

Output:

  leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1        H5394         6N938       W34142        83X652         19K4        139D54
2        49J62           P82        5L622         482S3       4T3962         78D43
3      142X394        652876       44N574        358866        18V94        682876
4      1057C83         25394      1P47484        724A94         P083         28394
5       45N564          3558       454N64          3458          434          8558
6        49405         A8174        49A05         28774        43405         2387N

CodePudding user response:

And in base:

how_many <- seq(1, length(unlist(strsplit(df1$codes1, '/'))), 1)
df1$left_0hr <- unlist(strsplit(df1$codes1, '/'))[which((how_many %% 2 == 0) == FALSE)]
df1
          codes1         codes2       codes3 left_0hr
1    H5394/6N938  W34142/83X652  19K4/139D54    H5394
2      49J62/P82    5L622/482S3 4T3962/78D43    49J62
3 142X394/652876  44N574/358866 18V94/682876  142X394
4  1057C83/25394 1P47484/724A94   P083/28394  1057C83
5    45N564/3558    454N64/3458     434/8558   45N564
6    49405/A8174    49A05/28774  43405/2387N    49405

and so on.

  • Related