Home > Back-end >  Transform data frame column into rows
Transform data frame column into rows

Time:10-06

Normally I would do this with a for-loop, but that is quite cumbersome, so I wonder how to do this more efficiently. The columns 'Ind_start' and the year in 'Start' are linked, the same goes for 'Ind_end' and the year in 'End'. How do I transpose in such a way that all years are in one column, all values ('Ind') are in one column and I add an additional column that indicates whether the year is 'Start' or 'End'?

DF <- structure(list(ID = c("F002", "F003", "F004", "F005", "L001", 
"L002", "L003", "A001", "L004", "L005", "A002", "A003", "A004", 
"A005", "A010", "A011", "A012", "L006", "L008", "L009", "L010", 
"L011", "L012", "L013", "L014", "L015", "L016", "L017", "L018", 
"L019", "L021", "A015", "A016", "A017", "L023", "L024", "L025", 
"A018", "A019", "A020", "A021", "A022", "L026", "L027", "L028", 
"L029", "L030", "A023", "A024", "A025", "A026", "A027", "A029", 
"L031", "L032", "L033", "L036", "A030", "L037", "L038", "L039", 
"L040"), Ind_end = c(4.5, 2.5, 5, 5, 15, 20, 2, 1, 2, 2, 2, 2, 
5, 20, 1, 2, 2, 10, 3, 2, 5, 6, 10, 7, 3, 3, 5, 4, 300, 2, 7, 
1, 1, 9, 2, 15, 2, 1, 5, 5, 10, 10, 3, 10, 2, 5, 1, 2, 6, 1, 
2, 1, 4, 2, 2, 2, 3, 5, 6, 2, 2, 10), Ind_start = c(2, 17.5, 2, 
100, 50, 20, 5, 4, 4, 2, 10, 2, 30, 600, 3, 5, 2, 20, 10, 20, 
20, 480, 2, 5, 3, 3, 10, 8, 500, 3, 11, 15, 1, 9, 5, 25, 4, 1, 
5, 25, 10, 100, 50, 6, 10, 15, 2, 5, 15, 5, 6, 3, 4, 3, 10, 10, 
60, 3, 10, 4, 5, 50), Start = c(2005L, 1972L, 1981L, 1973L, 1995L, 
2009L, 2014L, 1992L, 2010L, 1985L, 2010L, 2000L, 1992L, 1987L, 
1985L, 1993L, 1983L, 2002L, 1999L, 1995L, 1992L, 1998L, 1970L, 
2008L, 1990L, 1982L, 1987L, 1980L, 1989L, 1995L, 1996L, 1995L, 
2013L, 2004L, 2002L, 1998L, 1982L, 1990L, 1995L, 2010L, 1999L, 
1990L, 1977L, 1985L, 1960L, 1978L, 2000L, 1985L, 1996L, 1977L, 
1990L, 1992L, 2010L, 1979L, 1985L, 1983L, 1964L, 2008L, 1984L, 
1981L, 1977L, 1969L), End = c(2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L)), class = "data.frame", row.names = c(2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 19L, 
20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 37L, 40L, 41L, 42L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L, 
65L, 66L, 69L, 70L, 71L, 72L, 73L, 74L))

CodePudding user response:

The process you want to use is called pivoting:

library(tidyverse)
DF <- structure(list(ID = c(
  "F002", "F003", "F004", "F005", "L001",
  "L002", "L003", "A001", "L004", "L005", "A002", "A003", "A004",
  "A005", "A010", "A011", "A012", "L006", "L008", "L009", "L010",
  "L011", "L012", "L013", "L014", "L015", "L016", "L017", "L018",
  "L019", "L021", "A015", "A016", "A017", "L023", "L024", "L025",
  "A018", "A019", "A020", "A021", "A022", "L026", "L027", "L028",
  "L029", "L030", "A023", "A024", "A025", "A026", "A027", "A029",
  "L031", "L032", "L033", "L036", "A030", "L037", "L038", "L039",
  "L040"
), Ind_end = c(
  4.5, 2.5, 5, 5, 15, 20, 2, 1, 2, 2, 2, 2,
  5, 20, 1, 2, 2, 10, 3, 2, 5, 6, 10, 7, 3, 3, 5, 4, 300, 2, 7,
  1, 1, 9, 2, 15, 2, 1, 5, 5, 10, 10, 3, 10, 2, 5, 1, 2, 6, 1,
  2, 1, 4, 2, 2, 2, 3, 5, 6, 2, 2, 10
), Ind_start = c(
  2, 17.5, 2,
  100, 50, 20, 5, 4, 4, 2, 10, 2, 30, 600, 3, 5, 2, 20, 10, 20,
  20, 480, 2, 5, 3, 3, 10, 8, 500, 3, 11, 15, 1, 9, 5, 25, 4, 1,
  5, 25, 10, 100, 50, 6, 10, 15, 2, 5, 15, 5, 6, 3, 4, 3, 10, 10,
  60, 3, 10, 4, 5, 50
), Start = c(
  2005L, 1972L, 1981L, 1973L, 1995L,
  2009L, 2014L, 1992L, 2010L, 1985L, 2010L, 2000L, 1992L, 1987L,
  1985L, 1993L, 1983L, 2002L, 1999L, 1995L, 1992L, 1998L, 1970L,
  2008L, 1990L, 1982L, 1987L, 1980L, 1989L, 1995L, 1996L, 1995L,
  2013L, 2004L, 2002L, 1998L, 1982L, 1990L, 1995L, 2010L, 1999L,
  1990L, 1977L, 1985L, 1960L, 1978L, 2000L, 1985L, 1996L, 1977L,
  1990L, 1992L, 2010L, 1979L, 1985L, 1983L, 1964L, 2008L, 1984L,
  1981L, 1977L, 1969L
), End = c(
  2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
  2020L, 2020L, 2020L
)), class = "data.frame", row.names = c(
  2L,
  3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 19L,
  20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
  34L, 35L, 37L, 40L, 41L, 42L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
  51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L,
  65L, 66L, 69L, 70L, 71L, 72L, 73L, 74L
))

DF %>%
  pivot_longer(c(Start, End), names_to = "year_start", values_to = "year") %>%
  pivot_longer(c(Ind_start, Ind_end), names_to = "ind_start", values_to = "ind")
#> # A tibble: 248 x 5
#>    ID    year_start  year ind_start   ind
#>    <chr> <chr>      <int> <chr>     <dbl>
#>  1 F002  Start       2005 Ind_start   2  
#>  2 F002  Start       2005 Ind_end     4.5
#>  3 F002  End         2020 Ind_start   2  
#>  4 F002  End         2020 Ind_end     4.5
#>  5 F003  Start       1972 Ind_start  17.5
#>  6 F003  Start       1972 Ind_end     2.5
#>  7 F003  End         2020 Ind_start  17.5
#>  8 F003  End         2020 Ind_end     2.5
#>  9 F004  Start       1981 Ind_start   2  
#> 10 F004  Start       1981 Ind_end     5  
#> # … with 238 more rows

Created on 2021-10-06 by the reprex package (v2.0.1)

  • Related