Is there a way to extract in one call all the matched subgroups of a string according to a regular expression.
I have a date like this:
Thu, 07 Apr 2022 15:03:32 GMT
And I created the following regexp to extract all the parts of this date:
let re =
Str.regexp
{|\([a-zA-Z] \), \([0-9] \) \([a-zA-Z] \) \([0-9] \) \([0-9] \):\([0-9] \):\([0-9] \).*|}
And to extract each parts I use it like this:
let parse_date date =
let re =
Str.regexp
{|\([a-zA-Z] \), \([0-9] \) \([a-zA-Z] \) \([0-9] \) \([0-9] \):\([0-9] \):\([0-9] \).*|}
in
let wday = Str.replace_first re {|\1|} date in
let day = Str.replace_first re {|\2|} date in
let mon = Str.replace_first re {|\3|} date in
let year = Str.replace_first re {|\4|} date in
let hour = Str.replace_first re {|\5|} date in
let min = Str.replace_first re {|\6|} date in
let sec = Str.replace_first re {|\7|} date in
Format.eprintf "RE DATE: %s %s %s %s %s %s %s@." wday day mon year hour min
sec
If the parts were stored in an array I could easily use it like this:
let parse_date date =
let re =
Str.regexp
{|\([a-zA-Z] \), \([0-9] \) \([a-zA-Z] \) \([0-9] \) \([0-9] \):\([0-9] \):\([0-9] \).*|}
in
let parts = Str.match_groups re date in (* this function doesn't exist *)
let wday = parts.(1) in
let day = parts.(2) in
let mon = parts.(3) in
let year = parts.(4) in
let hour = parts.(5) in
let min = parts.(6) in
let sec = parts.(7) in
Format.eprintf "RE DATE: %s %s %s %s %s %s %s@." wday day mon year hour min
sec
but this doesn't appear to exist. Is there another way to do it or is my solution the only one available?
Since this isn't a XY problem, my goal is really to extract each part of a date so maybe there's another solution than using Str and I'll be happy to use it.
CodePudding user response:
You can use Str.matched_group
to return a particular capture group's match:
let parse_date date =
let re = Str.regexp
{|\([a-zA-Z] \), \([0-9] \) \([a-zA-Z] \) \([0-9] \) \([0-9] \):\([0-9] \):\([0-9] \).*|} in
if Str.string_match re date 0 then
let wday = Str.matched_group 1 date in
let day = Str.matched_group 2 date in
let mon = Str.matched_group 3 date in
let year = Str.matched_group 4 date in
let hour = Str.matched_group 5 date in
let min = Str.matched_group 6 date in
let sec = Str.matched_group 7 date in
Format.sprintf "RE DATE: %s %s %s %s %s %s %s@." wday day mon year hour min sec
else
"RE DATE: Not matched"
let _ = parse_date "Thu, 07 Apr 2022 15:03:32 GMT" |> print_endline
The Str
package is pretty primitive, though. I'd suggest using a different library for regular expressions, like PCRE-Ocaml. It does have a way to get an array of matched groups:
let parse_date2 date =
let rex = Pcre.regexp
{|([a-zA-Z] ), ([0-9] ) ([a-zA-Z] ) ([0-9] ) ([0-9] ):([0-9] ):([0-9] ).*|} in
try
let parts = Pcre.exec ~rex date |> Pcre.get_substrings in
let wday = parts.(1) in
let day = parts.(2) in
let mon = parts.(3) in
let year = parts.(4) in
let hour = parts.(5) in
let min = parts.(6) in
let sec = parts.(7) in
Format.sprintf "RE DATE: %s %s %s %s %s %s %s@." wday day mon year hour min sec
with Not_found -> "RE DATE: Not matched"
let _ = parse_date2 "Thu, 07 Apr 2022 15:03:32 GMT" |> print_endline
CodePudding user response:
For simple format with fixed number of fields and separators, Scanf might be enough:
let date s = Scanf.sscanf s "%s@, d %s %d %d:%d:%d %s"
(fun day_name day month year h m s timezone ->
day_name,day,month,year,h,m,s,timezone
)
let x = date "Thu, 07 Apr 2022 15:03:32 GMT"