Home > Software engineering >  How to pivot_longer with multiple columns to pivot
How to pivot_longer with multiple columns to pivot

Time:12-06

I have a df in wide format that has columns for start and end times for negative and positive controls (total 4 columns). I want to condense those four columns into 3 columns with one being "control" that is binary with a "1" indicating it is control and a "0" indicating it is an experiment, the second being "start_time", and the third being "end_time". Please see my dput for MRE and preferably I would like to see how this is done with the tidyverse

data <- structure(list(region = c("el_valle", "el_valle", "el_valle", 
"el_valle", "el_valle", "el_valle", "el_valle", "el_valle", "el_cope", 
"el_cope", "el_cope", "el_cope", "el_cope", "el_cope", "el_cope", 
"el_cope", "el_cope", "santa_fe", "santa_fe", "santa_fe", "santa_fe", 
"santa_fe", "santa_fe", "el_cope", "el_cope", "el_cope", "el_cope", 
"el_cope", "el_cope"), site = c("jordinal", "jordinal", "jordinal", 
"jordinal", "jordinal", "rio_maria", "rio_maria", "rio_maria", 
"rio_blanco", "rio_blanco", "rio_blanco", "guabal", "guabal", 
"guabal", "rio_tigrero", "rio_tigrero", "rio_tigrero", "altos_de piedra", 
"altos_de piedra", "altos_de piedra", "altos_de piedra", "altos_de piedra", 
"altos_de piedra", "sophia_stream", "sophia_stream", "sophia_stream", 
"medina", "medina", "medina"), start_date = c("2022-07-21T00:00:00 00:00", 
"2022-07-21T00:00:00 00:00", "2022-07-21T00:00:00 00:00", "2022-07-21T00:00:00 00:00", 
"2022-07-21T00:00:00 00:00", "2022-07-21T00:00:00 00:00", "2022-07-21T00:00:00 00:00", 
"2022-07-21T00:00:00 00:00", "2022-07-23T00:00:00 00:00", "2022-07-23T00:00:00 00:00", 
"2022-07-23T00:00:00 00:00", "2022-07-24T00:00:00 00:00", "2022-07-24T00:00:00 00:00", 
"2022-07-24T00:00:00 00:00", "2022-07-26T00:00:00 00:00", "2022-07-26T00:00:00 00:00", 
"2022-07-26T00:00:00 00:00", "2022-07-28T00:00:00 00:00", "2022-07-28T00:00:00 00:00", 
"2022-07-28T00:00:00 00:00", "2022-07-28T00:00:00 00:00", "2022-07-28T00:00:00 00:00", 
"2022-07-28T00:00:00 00:00", "2022-07-29T00:00:00 00:00", "2022-07-29T00:00:00 00:00", 
"2022-07-29T00:00:00 00:00", "2022-07-29T00:00:00 00:00", "2022-07-29T00:00:00 00:00", 
"2022-07-29T00:00:00 00:00"), geometry = c("POINT (-80.0864405 8.6514853)", 
"POINT (-80.0865047 8.6514879)", "POINT (-80.0864767 8.6514699)", 
"POINT (-80.0864365 8.6514033)", "POINT (-80.59016347153077 8.657592487131978)", 
"POINT (-80.0723613 8.6433237)", "POINT (-80.0724193 8.6433145)", 
"POINT (-80.59012719701079 8.657653855819484)", "POINT (-80.59084727642541 8.65736553296408)", 
"POINT (-80.59008739454525 8.65762533073169)", "POINT (-80.5900603404817 8.657645618088532)", 
"POINT (-80.59006448025009 8.65775089241877)", "POINT (-80.5900978883977 8.657643924266186)", 
"POINT (-80.59013662686417 8.657665177897595)", "POINT (-119.81285845363512 39.537657157446034)", 
"POINT (-119.81285845363512 39.537657157446034)", "POINT (-119.81285845363512 39.537657157446034)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81306464514674 39.53745922338899)", "POINT (-119.81306464514674 39.53745922338899)", 
"POINT (-119.81300112116044 39.53757812923485)", "POINT (-119.81300112116044 39.53757812923485)", 
"POINT (-119.8130185476632 39.53759315771993)", "POINT (-119.81295059817982 39.53742649195985)", 
"POINT (-119.81295059817982 39.53742649195985)", "POINT (-119.8129264843882 39.53773406660491)"
), id_edna_sample = c("220721_edna_001", "220721_edna_002", "220721_edna_003", 
"220721_edna_004", "220721_edna_007", "220721_005", "220721_006", 
"220721_007", "220723_001", "220723_002", "220723_003", "220724_001", 
"220724_002", "220724_003", "220726_eDNA_001", "220726_eDNA_002", 
"220726_eDNA_003", "220728_eDNA_001", "220728_eDNA_002", "220728_eDNA_003", 
"220728_eDNA_004", "220728_eDNA_005", "220728_eDNA_006", "220729_eDNA_001", 
"220729_eDNA_002", "220729_eDNA_005", "220729_eDNA_003", "220729_eDNA_004", 
"220729_eDNA_005"), transect_meters_sampled_m = c(0, 0, 0, 0, 
0, 10, 10, 0, 200, 200, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, NA), start_time_negative_control_edna_filtering = structure(c(NA, 
NA, NA, NA, 67800, NA, NA, 67800, NA, NA, 67680, NA, NA, 65760, 
NA, NA, 62700, NA, NA, NA, NA, 68340, 68340, NA, NA, 45600, NA, 
NA, 45600), class = c("hms", "difftime"), units = "secs"), finish_time_negative_control_edna_filtering = structure(c(NA, 
NA, NA, NA, 68520, NA, NA, 68520, NA, NA, 67920, NA, NA, 66360, 
NA, NA, 63000, NA, NA, NA, NA, 68640, 68640, NA, NA, 46020, NA, 
NA, 46020), class = c("hms", "difftime"), units = "secs"), start_time_sample_edna_filtering = structure(c(39180, 
40020, 40800, 41940, NA, 54480, 55860, NA, 65280, 66480, NA, 
63300, 64380, NA, 61500, 60300, NA, 66900, 66900, 67680, 67680, 
NA, NA, 42720, 43560, NA, 44280, 44940, NA), class = c("hms", 
"difftime"), units = "secs"), finish_time_sample_edna_filtering = structure(c(39540, 
40380, 41400, 42480, NA, 54720, 56280, NA, 65520, 66840, NA, 
63660, 64980, NA, 62160, 60660, NA, 67500, 67500, 68160, 68160, 
NA, NA, 43140, 43920, NA, 44700, 45360, NA), class = c("hms", 
"difftime"), units = "secs"), volume_of_filtered_sample_ml = c(1000, 
1000, 1000, NA, NA, 1000, NA, NA, 1000, 1000, NA, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, NA), edn_comments = c("Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample", 
"Jordinal A left.", "Jordinal B right. 10:58 am took sample", 
"Jordinal B left. 10:07 took sample", NA, "Taken at 12.47 transect A right", 
"Transect A left taken at 12:47", NA, "Taken  at 18:30", "Taken at 18.30", 
NA, NA, NA, NA, "Left 5 and 0.45 um", "Right 5 and 0.45 um", 
"Negative 5 and 0.45 um", "Left 5um", "Left 0.45um", "Right 5um", 
"Right 0.45um", "Negative 5um", "Negative 0.45um", "Left 0.45 and 5um", 
"Right 0.45 and 5um", "Negative 5 and 0.45 um", "Left 0.45 and 5 um", 
"Right 0.45 and 5 um", "Negative 5 and 0.45um")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -29L))

CodePudding user response:

We can use the names_pattern and its ".value" special name. I'll end in %>% str() so that we can see the real data since it's a bit wide as-is. (For clarity: do not use %>% str() in production, it is only for display purposes here.)

library(tidyr)
pivot_longer(
  data, 
  c(start_time_negative_control_edna_filtering, finish_time_negative_control_edna_filtering,
    start_time_sample_edna_filtering, finish_time_sample_edna_filtering), 
  names_pattern = "(start|finish)_time(.*)_edna_filtering",
  names_to = c(".value", "control")) %>%
  str()
# tibble [58 x 11] (S3: tbl_df/tbl/data.frame)
#  $ region                      : chr [1:58] "el_valle" "el_valle" "el_valle" "el_valle" ...
#  $ site                        : chr [1:58] "jordinal" "jordinal" "jordinal" "jordinal" ...
#  $ start_date                  : chr [1:58] "2022-07-21T00:00:00 00:00" "2022-07-21T00:00:00 00:00" "2022-07-21T00:00:00 00:00" "2022-07-21T00:00:00 00:00" ...
#  $ geometry                    : chr [1:58] "POINT (-80.0864405 8.6514853)" "POINT (-80.0864405 8.6514853)" "POINT (-80.0865047 8.6514879)" "POINT (-80.0865047 8.6514879)" ...
#  $ id_edna_sample              : chr [1:58] "220721_edna_001" "220721_edna_001" "220721_edna_002" "220721_edna_002" ...
#  $ transect_meters_sampled_m   : num [1:58] 0 0 0 0 0 0 0 0 0 0 ...
#  $ volume_of_filtered_sample_ml: num [1:58] 1000 1000 1000 1000 1000 1000 NA NA NA NA ...
#  $ edn_comments                : chr [1:58] "Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample" "Jordinal A right. 5 and 0.45 filters. Slightly less than 1L. 10:11 am took sample" "Jordinal A left." "Jordinal A left." ...
#  $ control                     : chr [1:58] "_negative_control" "_sample" "_negative_control" "_sample" ...
#  $ start                       : 'hms' num [1:58] NA 10:53:00 NA 11:07:00 ...
#   ..- attr(*, "units")= chr "secs"
#  $ finish                      : 'hms' num [1:58] NA 10:59:00 NA 11:13:00 ...
#   ..- attr(*, "units")= chr "secs"

I didn't finish the control column into a binary yet, I thought I'd keep it as the extract string so you can see better what's going on with the data. The added step to get that will be

... %>%
  mutate(control = grepl("control", control))

(assuming dplyr as well).

  • Related