I have a dataframe called data_subset
. In the aggregation_period
column, I want to replace every row containing month names (e.g, 3, 4, 5,6, 7) as an integer to month.abb format (e.g. Mar, Apr, May Jun, Jul) without affecting other rows containing AMJ and MAMJJ
. I have tried the below code chunks, but could not find a way to reach the required results.
Any thoughts and ideas, please?
> data_subset <- nor_ind_final_s1 %>% filter(source == 'modis')
> head(data_subset, 10)
year aggregation_period value source statistic variable
1 2001 3 0.44 modis mean NDVI
2 2001 4 0.57 modis mean NDVI
3 2001 5 0.62 modis mean NDVI
4 2001 6 0.75 modis mean NDVI
5 2001 7 0.62 modis mean NDVI
6 2002 3 0.28 modis mean NDVI
7 2002 4 0.43 modis mean NDVI
8 2002 5 0.70 modis mean NDVI
9 2002 6 0.80 modis mean NDVI
10 2002 7 0.75 modis mean NDVI
> tail(data_subset)
year aggregation_period value source statistic variable
114 2012 AMJ 0.7000 modis mean NDVI
115 2013 AMJ 0.6450 modis mean NDVI
116 2014 AMJ 0.5825 modis mean NDVI
117 2015 AMJ 0.6500 modis mean NDVI
118 2016 AMJ 0.4375 modis mean NDVI
119 2017 AMJ 0.6575 modis mean NDVI
> # check the levels
> levels(as.factor(data_subset$aggregation_period))
[1] "3" "4" "5" "6" "7" "AMJ" "MAMJJ"
> # create a vector of integers
> m <- c("3", "4", "5", "6", "7")
> # convert months to month.abb
> m.abb <- month.abb[as.numeric(m)]
> # check which rows contain months
> mons <- which(data_subset$aggregation_period == m)
Warning message:
In data_subset$aggregation_period == m :
longer object length is not a multiple of shorter object length
> mons
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
[51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
[76] 76 77 78 79 80 81 82 83 84 85
> # replace months names to month abbreviations
Here is the dput()
of my data.
> dput(data_subset)
structure(list(year = c(2001, 2001, 2001, 2001, 2001, 2002, 2002,
2002, 2002, 2002, 2003, 2003, 2003, 2003, 2003, 2004, 2004, 2004,
2004, 2004, 2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006,
2006, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2008,
2009, 2009, 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2010, 2011,
2011, 2011, 2011, 2011, 2012, 2012, 2012, 2012, 2012, 2013, 2013,
2013, 2013, 2013, 2014, 2014, 2014, 2014, 2014, 2015, 2015, 2015,
2015, 2015, 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017,
2017, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017), aggregation_period = c("3", "4", "5", "6", "7",
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5",
"6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3",
"4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6",
"7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4",
"5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7",
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5",
"6", "7", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ",
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ",
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "AMJ", "AMJ", "AMJ", "AMJ",
"AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ",
"AMJ", "AMJ", "AMJ", "AMJ"), value = c(0.44, 0.57, 0.62, 0.75,
0.62, 0.28, 0.43, 0.7, 0.8, 0.75, 0.4, 0.46, 0.76, 0.76, 0.58,
0.31, 0.46, 0.59, 0.74, 0.67, 0.4, 0.28, 0.71, 0.75, 0.76, 0.41,
0.33, 0.56, 0.76, 0.62, 0.35, 0.4, 0.66, 0.76, 0.71, 0.39, 0.56,
0.68, 0.78, 0.66, 0.64, 0.44, 0.71, 0.82, 0.65, 0.35, 0.41, 0.71,
0.77, 0.67, 0.59, 0.37, 0.61, 0.72, 0.78, 0.56, 0.6, 0.69, 0.74,
0.77, 0.59, 0.51, 0.56, 0.7, 0.81, 0.31, 0.32, 0.6, 0.68, 0.73,
0.45, 0.48, 0.59, 0.73, 0.8, 0.41, 0.36, 0.22, 0.68, 0.49, 0.42,
0.63, 0.45, 0.79, 0.76, 0.6, 0.592, 0.592, 0.554, 0.58, 0.536,
0.576, 0.614, 0.652, 0.582, 0.614, 0.672, 0.634, 0.528, 0.61,
0.432, 0.61, 0.64, 0.67, 0.64, 0.615, 0.625, 0.5675, 0.6325,
0.67, 0.655, 0.64, 0.62, 0.7, 0.645, 0.5825, 0.65, 0.4375, 0.6575
), source = c("modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis"), statistic = c("mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean"), variable = c("NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI")), row.names = c(NA, -119L), class = "data.frame")
CodePudding user response:
Using replace
on a predefined vector, no warnings.
rp <- data_subset$aggregation_period %in% 1:12
data_subset |>
transform(aggregation_period=
replace(aggregation_period, rp,
month.abb[as.double(aggregation_period[rp])]))
# year aggregation_period value source statistic variable
# 1 2001 Mar 0.4400 modis mean NDVI
# 2 2001 Apr 0.5700 modis mean NDVI
# 3 2001 May 0.6200 modis mean NDVI
# 4 2001 Jun 0.7500 modis mean NDVI
# 5 2001 Jul 0.6200 modis mean NDVI
# 6 2002 Mar 0.2800 modis mean NDVI
# ...
# 114 2012 AMJ 0.7000 modis mean NDVI
# 115 2013 AMJ 0.6450 modis mean NDVI
# 116 2014 AMJ 0.5825 modis mean NDVI
# 117 2015 AMJ 0.6500 modis mean NDVI
# 118 2016 AMJ 0.4375 modis mean NDVI
# 119 2017 AMJ 0.6575 modis mean NDVI
CodePudding user response:
You can use an ifelse
statement to replace the values in the aggregation_period
column to integer if it's between 1 to 12, then use month.abb
to rename the values.
head()
and tail()
of the results are pasted here.
library(dplyr)
df %>%
mutate(aggregation_period = ifelse(aggregation_period %in% 1:12,
month.abb[as.integer(aggregation_period)],
aggregation_period))
year aggregation_period value source statistic variable
1 2001 Mar 0.4400 modis mean NDVI
2 2001 Apr 0.5700 modis mean NDVI
3 2001 May 0.6200 modis mean NDVI
4 2001 Jun 0.7500 modis mean NDVI
5 2001 Jul 0.6200 modis mean NDVI
6 2002 Mar 0.2800 modis mean NDVI
7 2002 Apr 0.4300 modis mean NDVI
8 2002 May 0.7000 modis mean NDVI
9 2002 Jun 0.8000 modis mean NDVI
10 2002 Jul 0.7500 modis mean NDVI
11 2012 AMJ 0.7000 modis mean NDVI
12 2013 AMJ 0.6450 modis mean NDVI
13 2014 AMJ 0.5825 modis mean NDVI
14 2015 AMJ 0.6500 modis mean NDVI
15 2016 AMJ 0.4375 modis mean NDVI
16 2017 AMJ 0.6575 modis mean NDVI