In a dataframe, I have this structure:
> dput(df[sample(nrow(df), 50),])
structure(list(lat = c("58", "59", "57", "60", "66", "60", "57",
"59", "60", "58", "57", "59", "60", "65", "61", "55", "68", "67",
"58", "57", "60", "60", "63", "57", "58", "58", "58", "58", "62",
"57", "62", "59", "61", "64", "61", "68", "63", "56", "59", "58",
"65", "55", "63", "57", "56", "59", "63", "57", "68", "63"),
long = c(18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18), date = c("2018-03-15", "2014-04-15",
"2011-04-09", "2013-04-01", "2015-04-10", "2013-04-07", "2016-02-09",
"2004-03-24", "2020-03-09", "1974-02-04", "2007-03-03", "2020-03-11",
"1986-03-22", "2019-04-14", "2014-03-09", "2017-02-13", "1981-04-14",
"2020-05-17", "2017-03-08", "2017-02-28", "1990-03-03", "2016-03-24",
"1977-03-18", "2005-03-28", "1997-03-15", "2015-03-12", "1992-02-26",
"2014-03-15", "2001-04-01", "2011-03-31", "2015-04-06", "2020-03-07",
"2008-03-22", "2006-04-15", "1995-03-25", "2021-04-17", "2018-04-02",
"2010-02-15", "2009-03-16", "2015-03-12", "2011-04-01", "2011-04-03",
"2015-04-04", "2010-03-18", "2006-02-15", "1999-01-30", "2018-04-06",
"2004-03-01", "2017-04-15", "2008-04-14"), julian_day = c(74,
105, 99, 91, 100, 97, 40, 84, 69, 35, 62, 71, 81, 104, 68,
44, 104, 138, 67, 59, 62, 84, 77, 87, 74, 71, 57, 74, 91,
90, 96, 67, 82, 105, 84, 107, 92, 46, 75, 71, 91, 93, 94,
77, 46, 30, 96, 61, 105, 105), year = c(2018L, 2014L, 2011L,
2013L, 2015L, 2013L, 2016L, 2004L, 2020L, 1974L, 2007L, 2020L,
1986L, 2019L, 2014L, 2017L, 1981L, 2020L, 2017L, 2017L, 1990L,
2016L, 1977L, 2005L, 1997L, 2015L, 1992L, 2014L, 2001L, 2011L,
2015L, 2020L, 2008L, 2006L, 1995L, 2021L, 2018L, 2010L, 2009L,
2015L, 2011L, 2011L, 2015L, 2010L, 2006L, 1999L, 2018L, 2004L,
2017L, 2008L), decade = c("2010-2019", "2010-2019", "2010-2019",
"2010-2019", "2010-2019", "2010-2019", "2010-2019", "2000-2009",
"2020-2029", "1970-1979", "2000-2009", "2020-2029", "1980-1989",
"2010-2019", "2010-2019", "2010-2019", "1980-1989", "2020-2029",
"2010-2019", "2010-2019", "1990-1999", "2010-2019", "1970-1979",
"2000-2009", "1990-1999", "2010-2019", "1990-1999", "2010-2019",
"2000-2009", "2010-2019", "2010-2019", "2020-2029", "2000-2009",
"2000-2009", "1990-1999", "2020-2029", "2010-2019", "2010-2019",
"2000-2009", "2010-2019", "2010-2019", "2010-2019", "2010-2019",
"2010-2019", "2000-2009", "1990-1999", "2010-2019", "2000-2009",
"2010-2019", "2000-2009"), time = c(16L, 16L, 16L, 16L, 16L,
16L, 16L, 15L, 17L, 12L, 15L, 17L, 13L, 16L, 16L, 16L, 13L,
17L, 16L, 16L, 14L, 16L, 12L, 15L, 14L, 16L, 14L, 16L, 15L,
16L, 16L, 17L, 15L, 15L, 14L, 17L, 16L, 16L, 15L, 16L, 16L,
16L, 16L, 16L, 15L, 14L, 16L, 15L, 16L, 15L), lat_grouped = c("1",
"1", "1", "2", "3", "2", "1", "1", "2", "1", "1", "1", "2",
"3", "2", "1", "3", "3", "1", "1", "2", "2", "2", "1", "1",
"1", "1", "1", "2", "1", "2", "1", "2", "2", "2", "3", "2",
"1", "1", "1", "3", "1", "2", "1", "1", "1", "2", "1", "3",
"2"), year_centered = structure(c(31, 27, 24, 26, 28, 26,
29, 17, 33, -13, 20, 33, -1, 32, 27, 30, -6, 33, 30, 30,
3, 29, -10, 18, 10, 28, 5, 27, 14, 24, 28, 33, 21, 19, 8,
34, 31, 23, 22, 28, 24, 24, 28, 23, 19, 12, 31, 17, 30, 21
), class = "AsIs")), row.names = c(40906L, 33548L, 26710L,
30569L, 35789L, 30918L, 36341L, 10846L, 46766L, 1168L, 16820L,
46856L, 3802L, 45613L, 32327L, 37919L, 2369L, 49205L, 38390L,
38096L, 4785L, 37170L, 1708L, 12610L, 6617L, 34654L, 5151L, 32552L,
8202L, 26187L, 35657L, 46664L, 19124L, 16100L, 5963L, 52584L,
41832L, 23335L, 21987L, 34663L, 26205L, 26465L, 35498L, 23715L,
13550L, 7151L, 42097L, 10460L, 39831L, 20723L), class = "data.frame")
I would like to create an object that would count the number of rows for each individual julian_day per decade. Is it possible to iterate it?
Output would be something like:
decade | julian_day | nb_obs_perday_perdecade |
---|---|---|
1950-1959 | 3 | x = nrows that is julian_day = 3 in the decade 1950-59 |
1960-1969 | 15 | y = nrows that is julian_day = 15 in the decade 1950-59 |
and so on.
Thanks a lot for the help.
CodePudding user response:
To count the number of individual julian_day
values in a given decade, you could use dplyr::group_by()
for both decade
and julian_day
then count the number of instances with dplyr::count()
:
I assigned your dput()
as df
df %>% group_by(decade, julian_day) %>%
count()
output:
# Groups: decade, julian_day [44]
# decade julian_day n
# <chr> <dbl> <int>
# 1 1970-1979 35 1
# 2 1970-1979 77 1
# 3 1980-1989 81 1
# 4 1980-1989 104 1
# ...