I have the dataframe below and I would like to sort it by the datetime column Run Date
(from earliest to latest dates) but for each specific factor of the column SN
which in this case has 2 levels 134080504
and T133230503A
.
dat<-structure(list(SN = c("134080504", "134080504", "134080504",
"134080504", "134080504", "134080504", "134080504", "134080504",
"134080504", "134080504", "134080504", "134080504", "134080504",
"134080504", "134080504", "134080504", "134080504", "134080504",
"134080504", "134080504", "134080504", "134080504", "134080504",
"134080504", "134080504", "134080504", "134080504", "T133230503A",
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A",
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A",
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A",
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A",
"T133230503A", "T133230503A"), `Status [F/S]` = c("S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S"), `Run Date` = structure(c(1256466600,
1256264100, 1256228100, 1256219100, 1256202000, 1228897800, 1218629760,
1177851660, 1177462800, 1172398500, 1171795800, 1164396600, 1163971800,
1157567400, 1157224500, 1157079000, 1156874100, 1156752000, 1156410000,
1148266800, 1147984200, 1147780800, 1147177800, 1142753400, 1141551000,
1141372800, 1130468400, 1625724600, 1625724600, 1625724600, 1623628800,
1623628800, 1623628800, 1623358800, 1623358800, 1623358800, 1566812460,
1566812460, 1566812460, 1566665700, 1566665700, 1566665700, 1554503520,
1554503520, 1554503520, 1554161880, 1554161880, 1554161880, 1553929200,
1553929200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
50L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
Try it!
library(dplyr)
dat %>% arrange(`Run Date`, SN)
CodePudding user response:
The focus of this answer is that Run Date
gets ordered, whereas SN
does not (see below, though).
We could use a smart for loop here.
for (g in unique(dat$SN)) {
dat[dat$SN == g, ] <- dat[order(dat$`Run Date`)[dat$SN == g], ]
}
dat
# SN Status [F/S] Run Date
# 1 134080504 S 2005-10-28 03:00:00
# 2 134080504 S 2006-03-03 08:00:00
# 3 134080504 S 2006-03-05 09:30:00
# 4 134080504 S 2006-03-19 07:30:00
# 5 134080504 S 2006-05-09 12:30:00
# 6 134080504 S 2006-05-16 12:00:00
# 7 134080504 S 2006-05-18 20:30:00
# 8 134080504 S 2006-05-22 03:00:00
# 9 134080504 S 2006-08-24 09:00:00
# 10 134080504 S 2006-08-28 08:00:00
# 11 134080504 S 2006-08-29 17:55:00
# 12 134080504 S 2006-09-01 02:50:00
# 13 134080504 S 2006-09-02 19:15:00
# 14 134080504 S 2006-09-06 18:30:00
# 15 134080504 S 2006-11-19 21:30:00
# 16 134080504 S 2006-11-24 19:30:00
# 17 134080504 S 2007-02-18 10:50:00
# 18 134080504 S 2007-02-25 10:15:00
# 19 134080504 S 2007-04-25 01:00:00
# 20 134080504 S 2007-04-29 13:01:00
# 21 134080504 S 2008-08-13 12:16:00
# 22 134080504 S 2008-12-10 08:30:00
# 23 134080504 S 2009-10-22 09:00:00
# 24 134080504 S 2009-10-22 13:45:00
# 25 134080504 S 2009-10-22 16:15:00
# 26 134080504 S 2009-10-23 02:15:00
# 27 134080504 S 2009-10-25 10:30:00
# 28 T133230503A S 2019-03-30 07:00:00
# 29 T133230503A S 2019-03-30 07:00:00
# 30 T133230503A S 2019-04-01 23:38:00
# 31 T133230503A S 2019-04-01 23:38:00
# 32 T133230503A S 2019-04-01 23:38:00
# 33 T133230503A S 2019-04-05 22:32:00
# 34 T133230503A S 2019-04-05 22:32:00
# 35 T133230503A S 2019-04-05 22:32:00
# 36 T133230503A S 2019-08-24 16:55:00
# 37 T133230503A S 2019-08-24 16:55:00
# 38 T133230503A S 2019-08-24 16:55:00
# 39 T133230503A S 2019-08-26 09:41:00
# 40 T133230503A S 2019-08-26 09:41:00
# 41 T133230503A S 2019-08-26 09:41:00
# 42 T133230503A S 2021-06-10 21:00:00
# 43 T133230503A S 2021-06-10 21:00:00
# 44 T133230503A S 2021-06-10 21:00:00
# 45 T133230503A S 2021-06-14 00:00:00
# 46 T133230503A S 2021-06-14 00:00:00
# 47 T133230503A S 2021-06-14 00:00:00
# 48 T133230503A S 2021-07-08 06:10:00
# 49 T133230503A S 2021-07-08 06:10:00
# 50 T133230503A S 2021-07-08 06:10:00
Or try data.table
.
library(data.table)
dt <- as.data.table(dat)
dt[, `Run Date` := sort(`Run Date`), by=SN]
dt
# SN Status [F/S] Run Date
# 1: 134080504 S 2005-10-28 03:00:00
# 2: 134080504 S 2006-03-03 08:00:00
# 3: 134080504 S 2006-03-05 09:30:00
# 4: 134080504 S 2006-03-19 07:30:00
# 5: 134080504 S 2006-05-09 12:30:00
# 6: 134080504 S 2006-05-16 12:00:00
# 7: 134080504 S 2006-05-18 20:30:00
# 8: 134080504 S 2006-05-22 03:00:00
# 9: 134080504 S 2006-08-24 09:00:00
# 10: 134080504 S 2006-08-28 08:00:00
# 11: 134080504 S 2006-08-29 17:55:00
# 12: 134080504 S 2006-09-01 02:50:00
# 13: 134080504 S 2006-09-02 19:15:00
# 14: 134080504 S 2006-09-06 18:30:00
# 15: 134080504 S 2006-11-19 21:30:00
# 16: 134080504 S 2006-11-24 19:30:00
# 17: 134080504 S 2007-02-18 10:50:00
# 18: 134080504 S 2007-02-25 10:15:00
# 19: 134080504 S 2007-04-25 01:00:00
# 20: 134080504 S 2007-04-29 13:01:00
# 21: 134080504 S 2008-08-13 12:16:00
# 22: 134080504 S 2008-12-10 08:30:00
# 23: 134080504 S 2009-10-22 09:00:00
# 24: 134080504 S 2009-10-22 13:45:00
# 25: 134080504 S 2009-10-22 16:15:00
# 26: 134080504 S 2009-10-23 02:15:00
# 27: 134080504 S 2009-10-25 10:30:00
# 28: T133230503A S 2019-03-30 07:00:00
# 29: T133230503A S 2019-03-30 07:00:00
# 30: T133230503A S 2019-04-01 23:38:00
# 31: T133230503A S 2019-04-01 23:38:00
# 32: T133230503A S 2019-04-01 23:38:00
# 33: T133230503A S 2019-04-05 22:32:00
# 34: T133230503A S 2019-04-05 22:32:00
# 35: T133230503A S 2019-04-05 22:32:00
# 36: T133230503A S 2019-08-24 16:55:00
# 37: T133230503A S 2019-08-24 16:55:00
# 38: T133230503A S 2019-08-24 16:55:00
# 39: T133230503A S 2019-08-26 09:41:00
# 40: T133230503A S 2019-08-26 09:41:00
# 41: T133230503A S 2019-08-26 09:41:00
# 42: T133230503A S 2021-06-10 21:00:00
# 43: T133230503A S 2021-06-10 21:00:00
# 44: T133230503A S 2021-06-10 21:00:00
# 45: T133230503A S 2021-06-14 00:00:00
# 46: T133230503A S 2021-06-14 00:00:00
# 47: T133230503A S 2021-06-14 00:00:00
# 48: T133230503A S 2021-07-08 06:10:00
# 49: T133230503A S 2021-07-08 06:10:00
# 50: T133230503A S 2021-07-08 06:10:00
# SN Status [F/S] Run Date
Otherwise we could of course trivially do
dat[with(dat, order(SN, `Run Date`)), ]
or
dt[order(SN, `Run Date`)]
to order SN
as well.