I have this data:
years <- c("2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019", "2020")
n_cohorts <- length(years)
df <- structure(list(label2plot = structure(c(1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), levels = c("aaa", "bbb"), class = c("ordered",
"factor")), var_abs = c(717L, 569L, 860L, 752L, 713L, 575L, 918L,
724L, 946L, 764L, 951L, 764L, 784L, 691L, 672L, 610L, 833L, 671L,
773L, 620L, 532L, 293L), var_rel = c(0.557542768273717, 0.442457231726283,
0.533498759305211, 0.466501240694789, 0.553571428571429, 0.446428571428571,
0.559074299634592, 0.440925700365408, 0.553216374269006, 0.446783625730994,
0.554518950437318, 0.445481049562682, 0.531525423728814, 0.468474576271186,
0.52418096723869, 0.47581903276131, 0.553856382978723, 0.446143617021277,
0.554917444364681, 0.445082555635319, 0.644848484848485, 0.355151515151515
), year = c("2020", "2020", "2019", "2019", "2018", "2018",
"2017", "2017", "2016", "2016", "2015", "2015", "2014", "2014",
"2013", "2013", "2012", "2012", "2011", "2011", "2010", "2010"
)), row.names = c(NA, -22L), class = "data.frame")
cohorts <- length(years)
I would like to convert year
into a factor. I proceed as follows:
df$year2 <- factor(df$year,
levels = c(1:n_cohorts),
labels = years)
I get the following result and cannot explain why NA
s appear in year2
. Does anyone have an idea what I am doing wrong?
structure(list(label2plot = structure(c(1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), levels = c("aaa", "bbb"), class = c("ordered", "factor")),
var_abs = c(717L, 569L, 860L, 752L, 713L, 575L, 918L, 724L,
946L, 764L, 951L, 764L, 784L, 691L, 672L, 610L, 833L, 671L,
773L, 620L, 532L, 293L), var_rel = c(0.557542768273717, 0.442457231726283,
0.533498759305211, 0.466501240694789, 0.553571428571429,
0.446428571428571, 0.559074299634592, 0.440925700365408,
0.553216374269006, 0.446783625730994, 0.554518950437318,
0.445481049562682, 0.531525423728814, 0.468474576271186,
0.52418096723869, 0.47581903276131, 0.553856382978723, 0.446143617021277,
0.554917444364681, 0.445082555635319, 0.644848484848485,
0.355151515151515), year = c("2020", "2020", "2019", "2019",
"2018", "2018", "2017", "2017", "2016", "2016", "2015", "2015",
"2014", "2014", "2013", "2013", "2012", "2012", "2011", "2011",
"2010", "2010"), year2 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), levels = c("2010", "2011", "2012", "2013", "2014", "2015",
"2016", "2017", "2018", "2019", "2020"), class = "factor")), row.names = c(NA,
-22L), class = "data.frame")
CodePudding user response:
levels=
is
levels: an optional vector of the unique values (as character
strings) that 'x' might have taken. The default is the
unique set of values taken by 'as.character(x)', sorted into
increasing order _of 'x'_. Note that this set can be
specified as smaller than 'sort(unique(x))'.
which means that it should include "2020"
, etc. You are trying to pass it integers 1 through 11, which do not appear. Because your real df$year
values are not found in 1:11
, they are all converted to NA
.
factor(df$year, levels = 1:n_cohorts, labels = years)
# [1] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
# Levels: 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
Drop the levels=
and/or change labels=
to levels=
.
factor(df$year, labels = years)
# [1] 2020 2020 2019 2019 2018 2018 2017 2017 2016 2016 2015 2015 2014 2014 2013 2013 2012 2012 2011 2011 2010 2010
# Levels: 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
factor(df$year, levels = years)
# [1] 2020 2020 2019 2019 2018 2018 2017 2017 2016 2016 2015 2015 2014 2014 2013 2013 2012 2012 2011 2011 2010 2010
# Levels: 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
(They are identical.)