Home > Software engineering >  Convert months names to month.abb in a column rows in a dataframe?
Convert months names to month.abb in a column rows in a dataframe?

Time:03-22

I have a dataframe called data_subset. In the aggregation_period column, I want to replace every row containing month names (e.g, 3, 4, 5,6, 7) as an integer to month.abb format (e.g. Mar, Apr, May Jun, Jul) without affecting other rows containing AMJ and MAMJJ. I have tried the below code chunks, but could not find a way to reach the required results.

Any thoughts and ideas, please?

> data_subset <- nor_ind_final_s1 %>%  filter(source == 'modis')
> head(data_subset, 10) 
   year aggregation_period value source statistic variable
1  2001                  3  0.44  modis      mean     NDVI
2  2001                  4  0.57  modis      mean     NDVI
3  2001                  5  0.62  modis      mean     NDVI
4  2001                  6  0.75  modis      mean     NDVI
5  2001                  7  0.62  modis      mean     NDVI
6  2002                  3  0.28  modis      mean     NDVI
7  2002                  4  0.43  modis      mean     NDVI
8  2002                  5  0.70  modis      mean     NDVI
9  2002                  6  0.80  modis      mean     NDVI
10 2002                  7  0.75  modis      mean     NDVI
> tail(data_subset)
    year aggregation_period  value source statistic variable
114 2012                AMJ 0.7000  modis      mean     NDVI
115 2013                AMJ 0.6450  modis      mean     NDVI
116 2014                AMJ 0.5825  modis      mean     NDVI
117 2015                AMJ 0.6500  modis      mean     NDVI
118 2016                AMJ 0.4375  modis      mean     NDVI
119 2017                AMJ 0.6575  modis      mean     NDVI
> # check the levels 
> levels(as.factor(data_subset$aggregation_period))
[1] "3"     "4"     "5"     "6"     "7"     "AMJ"   "MAMJJ"
> # create a vector of integers 
> m <- c("3", "4", "5", "6", "7")
> # convert months to month.abb
> m.abb <- month.abb[as.numeric(m)]
> # check which rows contain months
> mons <- which(data_subset$aggregation_period == m)
Warning message:
In data_subset$aggregation_period == m :
  longer object length is not a multiple of shorter object length
> mons
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
[51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
[76] 76 77 78 79 80 81 82 83 84 85
> # replace months names to month abbreviations

Here is the dput() of my data.

> dput(data_subset)
structure(list(year = c(2001, 2001, 2001, 2001, 2001, 2002, 2002, 
2002, 2002, 2002, 2003, 2003, 2003, 2003, 2003, 2004, 2004, 2004, 
2004, 2004, 2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 
2006, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2008, 
2009, 2009, 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2010, 2011, 
2011, 2011, 2011, 2011, 2012, 2012, 2012, 2012, 2012, 2013, 2013, 
2013, 2013, 2013, 2014, 2014, 2014, 2014, 2014, 2015, 2015, 2015, 
2015, 2015, 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 
2017, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2001, 2002, 2003, 2004, 
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
2016, 2017), aggregation_period = c("3", "4", "5", "6", "7", 
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", 
"6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", 
"4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", 
"7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", 
"5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", 
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", 
"6", "7", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", 
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", 
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "AMJ", "AMJ", "AMJ", "AMJ", 
"AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", 
"AMJ", "AMJ", "AMJ", "AMJ"), value = c(0.44, 0.57, 0.62, 0.75, 
0.62, 0.28, 0.43, 0.7, 0.8, 0.75, 0.4, 0.46, 0.76, 0.76, 0.58, 
0.31, 0.46, 0.59, 0.74, 0.67, 0.4, 0.28, 0.71, 0.75, 0.76, 0.41, 
0.33, 0.56, 0.76, 0.62, 0.35, 0.4, 0.66, 0.76, 0.71, 0.39, 0.56, 
0.68, 0.78, 0.66, 0.64, 0.44, 0.71, 0.82, 0.65, 0.35, 0.41, 0.71, 
0.77, 0.67, 0.59, 0.37, 0.61, 0.72, 0.78, 0.56, 0.6, 0.69, 0.74, 
0.77, 0.59, 0.51, 0.56, 0.7, 0.81, 0.31, 0.32, 0.6, 0.68, 0.73, 
0.45, 0.48, 0.59, 0.73, 0.8, 0.41, 0.36, 0.22, 0.68, 0.49, 0.42, 
0.63, 0.45, 0.79, 0.76, 0.6, 0.592, 0.592, 0.554, 0.58, 0.536, 
0.576, 0.614, 0.652, 0.582, 0.614, 0.672, 0.634, 0.528, 0.61, 
0.432, 0.61, 0.64, 0.67, 0.64, 0.615, 0.625, 0.5675, 0.6325, 
0.67, 0.655, 0.64, 0.62, 0.7, 0.645, 0.5825, 0.65, 0.4375, 0.6575
), source = c("modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis", "modis", "modis", "modis", "modis", "modis", "modis", 
"modis"), statistic = c("mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean", 
"mean", "mean"), variable = c("NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", 
"NDVI", "NDVI", "NDVI")), row.names = c(NA, -119L), class = "data.frame")

CodePudding user response:

Using replace on a predefined vector, no warnings.

rp <- data_subset$aggregation_period %in% 1:12
data_subset |>
  transform(aggregation_period=
              replace(aggregation_period, rp, 
                      month.abb[as.double(aggregation_period[rp])]))
#     year aggregation_period  value source statistic variable
# 1   2001                Mar 0.4400  modis      mean     NDVI
# 2   2001                Apr 0.5700  modis      mean     NDVI
# 3   2001                May 0.6200  modis      mean     NDVI
# 4   2001                Jun 0.7500  modis      mean     NDVI
# 5   2001                Jul 0.6200  modis      mean     NDVI
# 6   2002                Mar 0.2800  modis      mean     NDVI
# ...
# 114 2012                AMJ 0.7000  modis      mean     NDVI
# 115 2013                AMJ 0.6450  modis      mean     NDVI
# 116 2014                AMJ 0.5825  modis      mean     NDVI
# 117 2015                AMJ 0.6500  modis      mean     NDVI
# 118 2016                AMJ 0.4375  modis      mean     NDVI
# 119 2017                AMJ 0.6575  modis      mean     NDVI

CodePudding user response:

You can use an ifelse statement to replace the values in the aggregation_period column to integer if it's between 1 to 12, then use month.abb to rename the values.

head() and tail() of the results are pasted here.

library(dplyr)

df %>% 
  mutate(aggregation_period = ifelse(aggregation_period %in% 1:12, 
                                     month.abb[as.integer(aggregation_period)], 
                                     aggregation_period))

   year aggregation_period  value source statistic variable
1  2001                Mar 0.4400  modis      mean     NDVI
2  2001                Apr 0.5700  modis      mean     NDVI
3  2001                May 0.6200  modis      mean     NDVI
4  2001                Jun 0.7500  modis      mean     NDVI
5  2001                Jul 0.6200  modis      mean     NDVI
6  2002                Mar 0.2800  modis      mean     NDVI
7  2002                Apr 0.4300  modis      mean     NDVI
8  2002                May 0.7000  modis      mean     NDVI
9  2002                Jun 0.8000  modis      mean     NDVI
10 2002                Jul 0.7500  modis      mean     NDVI
11 2012                AMJ 0.7000  modis      mean     NDVI
12 2013                AMJ 0.6450  modis      mean     NDVI
13 2014                AMJ 0.5825  modis      mean     NDVI
14 2015                AMJ 0.6500  modis      mean     NDVI
15 2016                AMJ 0.4375  modis      mean     NDVI
16 2017                AMJ 0.6575  modis      mean     NDVI
  •  Tags:  
  • r
  • Related