Normally I would do this with a for-loop, but that is quite cumbersome, so I wonder how to do this more efficiently. The columns 'Ind_start' and the year in 'Start' are linked, the same goes for 'Ind_end' and the year in 'End'. How do I transpose in such a way that all years are in one column, all values ('Ind') are in one column and I add an additional column that indicates whether the year is 'Start' or 'End'?
DF <- structure(list(ID = c("F002", "F003", "F004", "F005", "L001",
"L002", "L003", "A001", "L004", "L005", "A002", "A003", "A004",
"A005", "A010", "A011", "A012", "L006", "L008", "L009", "L010",
"L011", "L012", "L013", "L014", "L015", "L016", "L017", "L018",
"L019", "L021", "A015", "A016", "A017", "L023", "L024", "L025",
"A018", "A019", "A020", "A021", "A022", "L026", "L027", "L028",
"L029", "L030", "A023", "A024", "A025", "A026", "A027", "A029",
"L031", "L032", "L033", "L036", "A030", "L037", "L038", "L039",
"L040"), Ind_end = c(4.5, 2.5, 5, 5, 15, 20, 2, 1, 2, 2, 2, 2,
5, 20, 1, 2, 2, 10, 3, 2, 5, 6, 10, 7, 3, 3, 5, 4, 300, 2, 7,
1, 1, 9, 2, 15, 2, 1, 5, 5, 10, 10, 3, 10, 2, 5, 1, 2, 6, 1,
2, 1, 4, 2, 2, 2, 3, 5, 6, 2, 2, 10), Ind_start = c(2, 17.5, 2,
100, 50, 20, 5, 4, 4, 2, 10, 2, 30, 600, 3, 5, 2, 20, 10, 20,
20, 480, 2, 5, 3, 3, 10, 8, 500, 3, 11, 15, 1, 9, 5, 25, 4, 1,
5, 25, 10, 100, 50, 6, 10, 15, 2, 5, 15, 5, 6, 3, 4, 3, 10, 10,
60, 3, 10, 4, 5, 50), Start = c(2005L, 1972L, 1981L, 1973L, 1995L,
2009L, 2014L, 1992L, 2010L, 1985L, 2010L, 2000L, 1992L, 1987L,
1985L, 1993L, 1983L, 2002L, 1999L, 1995L, 1992L, 1998L, 1970L,
2008L, 1990L, 1982L, 1987L, 1980L, 1989L, 1995L, 1996L, 1995L,
2013L, 2004L, 2002L, 1998L, 1982L, 1990L, 1995L, 2010L, 1999L,
1990L, 1977L, 1985L, 1960L, 1978L, 2000L, 1985L, 1996L, 1977L,
1990L, 1992L, 2010L, 1979L, 1985L, 1983L, 1964L, 2008L, 1984L,
1981L, 1977L, 1969L), End = c(2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L)), class = "data.frame", row.names = c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 19L,
20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 37L, 40L, 41L, 42L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L,
65L, 66L, 69L, 70L, 71L, 72L, 73L, 74L))
CodePudding user response:
The process you want to use is called pivoting:
library(tidyverse)
DF <- structure(list(ID = c(
"F002", "F003", "F004", "F005", "L001",
"L002", "L003", "A001", "L004", "L005", "A002", "A003", "A004",
"A005", "A010", "A011", "A012", "L006", "L008", "L009", "L010",
"L011", "L012", "L013", "L014", "L015", "L016", "L017", "L018",
"L019", "L021", "A015", "A016", "A017", "L023", "L024", "L025",
"A018", "A019", "A020", "A021", "A022", "L026", "L027", "L028",
"L029", "L030", "A023", "A024", "A025", "A026", "A027", "A029",
"L031", "L032", "L033", "L036", "A030", "L037", "L038", "L039",
"L040"
), Ind_end = c(
4.5, 2.5, 5, 5, 15, 20, 2, 1, 2, 2, 2, 2,
5, 20, 1, 2, 2, 10, 3, 2, 5, 6, 10, 7, 3, 3, 5, 4, 300, 2, 7,
1, 1, 9, 2, 15, 2, 1, 5, 5, 10, 10, 3, 10, 2, 5, 1, 2, 6, 1,
2, 1, 4, 2, 2, 2, 3, 5, 6, 2, 2, 10
), Ind_start = c(
2, 17.5, 2,
100, 50, 20, 5, 4, 4, 2, 10, 2, 30, 600, 3, 5, 2, 20, 10, 20,
20, 480, 2, 5, 3, 3, 10, 8, 500, 3, 11, 15, 1, 9, 5, 25, 4, 1,
5, 25, 10, 100, 50, 6, 10, 15, 2, 5, 15, 5, 6, 3, 4, 3, 10, 10,
60, 3, 10, 4, 5, 50
), Start = c(
2005L, 1972L, 1981L, 1973L, 1995L,
2009L, 2014L, 1992L, 2010L, 1985L, 2010L, 2000L, 1992L, 1987L,
1985L, 1993L, 1983L, 2002L, 1999L, 1995L, 1992L, 1998L, 1970L,
2008L, 1990L, 1982L, 1987L, 1980L, 1989L, 1995L, 1996L, 1995L,
2013L, 2004L, 2002L, 1998L, 1982L, 1990L, 1995L, 2010L, 1999L,
1990L, 1977L, 1985L, 1960L, 1978L, 2000L, 1985L, 1996L, 1977L,
1990L, 1992L, 2010L, 1979L, 1985L, 1983L, 1964L, 2008L, 1984L,
1981L, 1977L, 1969L
), End = c(
2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2020L, 2020L
)), class = "data.frame", row.names = c(
2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 19L,
20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 37L, 40L, 41L, 42L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L,
65L, 66L, 69L, 70L, 71L, 72L, 73L, 74L
))
DF %>%
pivot_longer(c(Start, End), names_to = "year_start", values_to = "year") %>%
pivot_longer(c(Ind_start, Ind_end), names_to = "ind_start", values_to = "ind")
#> # A tibble: 248 x 5
#> ID year_start year ind_start ind
#> <chr> <chr> <int> <chr> <dbl>
#> 1 F002 Start 2005 Ind_start 2
#> 2 F002 Start 2005 Ind_end 4.5
#> 3 F002 End 2020 Ind_start 2
#> 4 F002 End 2020 Ind_end 4.5
#> 5 F003 Start 1972 Ind_start 17.5
#> 6 F003 Start 1972 Ind_end 2.5
#> 7 F003 End 2020 Ind_start 17.5
#> 8 F003 End 2020 Ind_end 2.5
#> 9 F004 Start 1981 Ind_start 2
#> 10 F004 Start 1981 Ind_end 5
#> # … with 238 more rows
Created on 2021-10-06 by the reprex package (v2.0.1)