Home > Back-end >  Sort a dataframe by datetime for every factor level in other column
Sort a dataframe by datetime for every factor level in other column

Time:11-16

I have the dataframe below and I would like to sort it by the datetime column Run Date (from earliest to latest dates) but for each specific factor of the column SN which in this case has 2 levels 134080504 and T133230503A.

dat<-structure(list(SN = c("134080504", "134080504", "134080504", 
"134080504", "134080504", "134080504", "134080504", "134080504", 
"134080504", "134080504", "134080504", "134080504", "134080504", 
"134080504", "134080504", "134080504", "134080504", "134080504", 
"134080504", "134080504", "134080504", "134080504", "134080504", 
"134080504", "134080504", "134080504", "134080504", "T133230503A", 
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A", 
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A", 
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A", 
"T133230503A", "T133230503A", "T133230503A", "T133230503A", "T133230503A", 
"T133230503A", "T133230503A"), `Status [F/S]` = c("S", "S", "S", 
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", 
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", 
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", 
"S", "S", "S", "S", "S", "S", "S", "S"), `Run Date` = structure(c(1256466600, 
1256264100, 1256228100, 1256219100, 1256202000, 1228897800, 1218629760, 
1177851660, 1177462800, 1172398500, 1171795800, 1164396600, 1163971800, 
1157567400, 1157224500, 1157079000, 1156874100, 1156752000, 1156410000, 
1148266800, 1147984200, 1147780800, 1147177800, 1142753400, 1141551000, 
1141372800, 1130468400, 1625724600, 1625724600, 1625724600, 1623628800, 
1623628800, 1623628800, 1623358800, 1623358800, 1623358800, 1566812460, 
1566812460, 1566812460, 1566665700, 1566665700, 1566665700, 1554503520, 
1554503520, 1554503520, 1554161880, 1554161880, 1554161880, 1553929200,
1553929200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, 
50L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

Try it!

library(dplyr)
dat %>% arrange(`Run Date`, SN)

CodePudding user response:

The focus of this answer is that Run Date gets ordered, whereas SN does not (see below, though).

We could use a smart for loop here.

for (g in unique(dat$SN)) {
  dat[dat$SN == g, ] <- dat[order(dat$`Run Date`)[dat$SN == g], ]
}

dat
#             SN Status [F/S]            Run Date
# 1    134080504            S 2005-10-28 03:00:00
# 2    134080504            S 2006-03-03 08:00:00
# 3    134080504            S 2006-03-05 09:30:00
# 4    134080504            S 2006-03-19 07:30:00
# 5    134080504            S 2006-05-09 12:30:00
# 6    134080504            S 2006-05-16 12:00:00
# 7    134080504            S 2006-05-18 20:30:00
# 8    134080504            S 2006-05-22 03:00:00
# 9    134080504            S 2006-08-24 09:00:00
# 10   134080504            S 2006-08-28 08:00:00
# 11   134080504            S 2006-08-29 17:55:00
# 12   134080504            S 2006-09-01 02:50:00
# 13   134080504            S 2006-09-02 19:15:00
# 14   134080504            S 2006-09-06 18:30:00
# 15   134080504            S 2006-11-19 21:30:00
# 16   134080504            S 2006-11-24 19:30:00
# 17   134080504            S 2007-02-18 10:50:00
# 18   134080504            S 2007-02-25 10:15:00
# 19   134080504            S 2007-04-25 01:00:00
# 20   134080504            S 2007-04-29 13:01:00
# 21   134080504            S 2008-08-13 12:16:00
# 22   134080504            S 2008-12-10 08:30:00
# 23   134080504            S 2009-10-22 09:00:00
# 24   134080504            S 2009-10-22 13:45:00
# 25   134080504            S 2009-10-22 16:15:00
# 26   134080504            S 2009-10-23 02:15:00
# 27   134080504            S 2009-10-25 10:30:00
# 28 T133230503A            S 2019-03-30 07:00:00
# 29 T133230503A            S 2019-03-30 07:00:00
# 30 T133230503A            S 2019-04-01 23:38:00
# 31 T133230503A            S 2019-04-01 23:38:00
# 32 T133230503A            S 2019-04-01 23:38:00
# 33 T133230503A            S 2019-04-05 22:32:00
# 34 T133230503A            S 2019-04-05 22:32:00
# 35 T133230503A            S 2019-04-05 22:32:00
# 36 T133230503A            S 2019-08-24 16:55:00
# 37 T133230503A            S 2019-08-24 16:55:00
# 38 T133230503A            S 2019-08-24 16:55:00
# 39 T133230503A            S 2019-08-26 09:41:00
# 40 T133230503A            S 2019-08-26 09:41:00
# 41 T133230503A            S 2019-08-26 09:41:00
# 42 T133230503A            S 2021-06-10 21:00:00
# 43 T133230503A            S 2021-06-10 21:00:00
# 44 T133230503A            S 2021-06-10 21:00:00
# 45 T133230503A            S 2021-06-14 00:00:00
# 46 T133230503A            S 2021-06-14 00:00:00
# 47 T133230503A            S 2021-06-14 00:00:00
# 48 T133230503A            S 2021-07-08 06:10:00
# 49 T133230503A            S 2021-07-08 06:10:00
# 50 T133230503A            S 2021-07-08 06:10:00

Or try data.table.

library(data.table)
dt <- as.data.table(dat)
dt[, `Run Date` := sort(`Run Date`), by=SN]

dt
#             SN Status [F/S]            Run Date
# 1:   134080504            S 2005-10-28 03:00:00
# 2:   134080504            S 2006-03-03 08:00:00
# 3:   134080504            S 2006-03-05 09:30:00
# 4:   134080504            S 2006-03-19 07:30:00
# 5:   134080504            S 2006-05-09 12:30:00
# 6:   134080504            S 2006-05-16 12:00:00
# 7:   134080504            S 2006-05-18 20:30:00
# 8:   134080504            S 2006-05-22 03:00:00
# 9:   134080504            S 2006-08-24 09:00:00
# 10:   134080504            S 2006-08-28 08:00:00
# 11:   134080504            S 2006-08-29 17:55:00
# 12:   134080504            S 2006-09-01 02:50:00
# 13:   134080504            S 2006-09-02 19:15:00
# 14:   134080504            S 2006-09-06 18:30:00
# 15:   134080504            S 2006-11-19 21:30:00
# 16:   134080504            S 2006-11-24 19:30:00
# 17:   134080504            S 2007-02-18 10:50:00
# 18:   134080504            S 2007-02-25 10:15:00
# 19:   134080504            S 2007-04-25 01:00:00
# 20:   134080504            S 2007-04-29 13:01:00
# 21:   134080504            S 2008-08-13 12:16:00
# 22:   134080504            S 2008-12-10 08:30:00
# 23:   134080504            S 2009-10-22 09:00:00
# 24:   134080504            S 2009-10-22 13:45:00
# 25:   134080504            S 2009-10-22 16:15:00
# 26:   134080504            S 2009-10-23 02:15:00
# 27:   134080504            S 2009-10-25 10:30:00
# 28: T133230503A            S 2019-03-30 07:00:00
# 29: T133230503A            S 2019-03-30 07:00:00
# 30: T133230503A            S 2019-04-01 23:38:00
# 31: T133230503A            S 2019-04-01 23:38:00
# 32: T133230503A            S 2019-04-01 23:38:00
# 33: T133230503A            S 2019-04-05 22:32:00
# 34: T133230503A            S 2019-04-05 22:32:00
# 35: T133230503A            S 2019-04-05 22:32:00
# 36: T133230503A            S 2019-08-24 16:55:00
# 37: T133230503A            S 2019-08-24 16:55:00
# 38: T133230503A            S 2019-08-24 16:55:00
# 39: T133230503A            S 2019-08-26 09:41:00
# 40: T133230503A            S 2019-08-26 09:41:00
# 41: T133230503A            S 2019-08-26 09:41:00
# 42: T133230503A            S 2021-06-10 21:00:00
# 43: T133230503A            S 2021-06-10 21:00:00
# 44: T133230503A            S 2021-06-10 21:00:00
# 45: T133230503A            S 2021-06-14 00:00:00
# 46: T133230503A            S 2021-06-14 00:00:00
# 47: T133230503A            S 2021-06-14 00:00:00
# 48: T133230503A            S 2021-07-08 06:10:00
# 49: T133230503A            S 2021-07-08 06:10:00
# 50: T133230503A            S 2021-07-08 06:10:00
# SN Status [F/S]            Run Date

Otherwise we could of course trivially do

dat[with(dat, order(SN, `Run Date`)), ]

or

dt[order(SN, `Run Date`)]

to order SN as well.

  •  Tags:  
  • r
  • Related