import re
input_text_to_check = "En ese sitio [(2022-09-04 15:10 pm)to(2022-09-04 04:30 am)][] vi que hay algunos campamentos y [(2022-09-06 04:00 am)to(2022-09-05 04:15 am)][fuimos a caminar hasta las 22:00 pm montanas, de alli] cerca de hasta las 23:00 pm hs"
I need to rearrange the datetime patterns if the time periods are reversed, for example :
Input: [(2022-09-04 15:10 pm)to(2022-09-04 04:30 am)]
Correct output: [(2022-09-04 04:30 am)to(2022-09-04 15:10 pm)]
Input: [(2022-09-06 04:00 am)to(2022-09-05 04:15 am)]
Correct output: [(2022-09-05 04:15 am)to(2022-09-06 04:00 am)]
And this is the function that contains the replacement logic, which must receive the substring with the values of year
, month
, day
, hour
, minute
, and am or pm
def reorder_inconsistent_time_periods(input_text_to_check):
print(repr(input_text_to_check))
#Extract dates and hour values
#-------------------
date_in_numbers = r"\d{4}[\s|]*-[\s|]*\d{2}[\s|]*-[\s|]*\d{2}"
time_in_numbers = r"(\d{2})[\s|]*(?::|)[\s|]*(\d{2})[\s|]*(?:am|pm)"
date_and_time_re_pattern = "[(" date_in_numbers " " time_in_numbers ")to(" date_in_numbers " " time_in_numbers ")]"
time_groups = []
m0 = re.search(date_and_time_re_pattern, input_text_to_check, re.IGNORECASE)
if m0:
time_groups = m0.groups()
print(time_groups)
#-------------------
#This time 1 has to be less than time 2
year_1, month_1, day_1, hour_1, minute_1, am_or_pm_1 = time_groups[0], time_groups[1], time_groups[2], time_groups[3], time_groups[4], time_groups[5]
year_2, month_2, day_2, hour_2, minute_2, am_or_pm_2 = time_groups[6], time_groups[7], time_groups[8], time_groups[9], time_groups[10], time_groups[11]
same_year, same_month, same_day, same_hour, same_minute = False, False, False, False, False
reorder_time_period = ""
if(int(year_2) < int(year_1)):
#reorder times
reorder_time_period = "[(" year_2 "-" month_2 "-" day_2 " " hour_2 ":" minute_2 " " am_or_pm_2 ")to(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
if(int(year_2) == int(year_1)): same_year = True
if(int(month_2) < int(month_1)):
#reorder times
reorder_time_period = "[(" year_2 "-" month_2 "-" day_2 " " hour_2 ":" minute_2 " " am_or_pm_2 ")to(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
if(int(month_2) == int(month_1)): same_month = True
if(int(day_2) < int(day_1)):
#reorder times
reorder_time_period = "[(" year_2 "-" month_2 "-" day_2 " " hour_2 ":" minute_2 " " am_or_pm_2 ")to(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
if(int(day_2) == int(day_1)): same_day = True
if(int(hour_2) < int(hour_1)):
#reorder times
reorder_time_period = "[(" year_2 "-" month_2 "-" day_2 " " hour_2 ":" minute_2 " " am_or_pm_2 ")to(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
if(int(day_2) == int(day_1)): same_hour = True
if(int(minute_2) < int(minute_1)):
#reorder times
reorder_time_period = "[(" year_2 "-" month_2 "-" day_2 " " hour_2 ":" minute_2 " " am_or_pm_2 ")to(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
if(int(minute_2) == int(minute_1)): same_minute = True
if ( (same_year == True) and (same_month == True) and (same_day == True) and (same_hour == True) and (same_minute == True) ):
#It is not necessary to establish a period, you could only put a single date
reorder_time_period = "[(" year_1 "-" month_1 "-" day_1 " " hour_1 ":" minute_1 " " am_or_pm_1 ")]"
else:
#There is no need to reorder anything because the time period in the input string is well established
reorder_time_period = input_text_to_check
return reorder_time_period
Here I establish the regex that conditions the call to the function that will be in charge of reordering the data
date_in_numbers = r"\d{4}[\s|]*-[\s|]*\d{2}[\s|]*-[\s|]*\d{2}"
time_in_numbers = r"(\d{2})[\s|]*(?::|)[\s|]*(\d{2})[\s|]*(?:am|pm)"
date_and_time_re_pattern = "[(" date_in_numbers " " time_in_numbers ")to(" date_in_numbers " " time_in_numbers ")]"
input_text_to_check = re.sub(date_and_time_re_pattern, reorder_inconsistent_time_periods, input_text_to_check)
print(repr(input_text_to_check))
This should print the original string but with the time periods reordered :
"En ese sitio [(2022-09-04 04:30 am)to(2022-09-04 15:10 pm)][] vi que hay algunos campamentos y [(2022-09-05 04:15 am)to(2022-09-06 04:00 am)][fuimos a caminar hasta las 22:00 pm montanas, de alli] cerca de hasta las 23:00 pm hs"
the problem is that for some reason this error appears in the regex pattern line, although I don't understand why this is appear.
Traceback (most recent call last):
m0 = re.search(date_and_time_re_pattern, input_text_to_check, re.IGNORECASE)
raise source.error("unbalanced parenthesis")
re.error: unbalanced parenthesis at position 91
I hope you can help me, maybe even find a more convenient way to do this.
CodePudding user response:
Here is a solution using re.sub
with a callback function. We can match the datetime and am/pm components in 4 separate capture groups. Then, parse each datetime string into a bona fide datetime. Finally, compare the two datetimes to determine the correct order. Note that your times are already in 24 hour format, so the am/pm values are superfluous and we don't need to parse them.
def repl(m):
str1 = m.group(1)
dt1 = datetime.strptime(m.group(1), '%Y-%m-%d %H:%M')
p1 = m.group(2)
str2 = m.group(3)
dt2 = datetime.strptime(m.group(3), '%Y-%m-%d %H:%M')
p2 = m.group(4)
if dt1 < dt2:
output = f"[({str1} {p1})to({str2} {p2})]"
else:
output = f"[({str2} {p2})to({str1} {p1})]"
return output
input_text_to_check = "En ese sitio [(2022-09-04 15:10 pm)to(2022-09-04 04:30 am)][] vi que hay algunos campamentos y [(2022-09-06 04:00 am)to(2022-09-05 04:15 am)][fuimos a caminar hasta las 22:00 pm montanas, de alli] cerca de hasta las 23:00 pm hs"
output = re.sub(r'\[\((\d{4}-\d{2}-\d{2} \d{2}:\d{2}) (am|pm)\)to\((\d{4}-\d{2}-\d{2} \d{2}:\d{2}) (am|pm)\)\]', repl, input_text_to_check)
print(output)
This prints:
En ese sitio [(2022-09-04 04:30 am)to(2022-09-04 15:10 pm)][] vi que hay algunos campamentos y [(2022-09-05 04:15 am)to(2022-09-06 04:00 am)][fuimos a caminar hasta las 22:00 pm montanas, de alli] cerca de hasta las 23:00 pm hs