import re
s = """Sentence 1: Admit on TOM, SINI [STOM] Dismissal Date: 07/20/2021
Sentence 2: Admit 08/07/2021 TOM, SINI [STOM]
Sentence 3: Adm: 09/07/2018 TOM, SINI [STOM]"""
admission_keys = ['Admit','Adm:']
date_patterns = ['\d{2}/\d{2}/\d{4}','\d{1,2}[-]\d{1,2}[-]\d{2,4}']
pattern = re.findall(r"(?:(?:" '|'.join(admission_keys) ").*?(?:" '|'.join(date_patterns) '))',s)
print(pattern)
Required solution: Admit 08/07/2021,Adm: 09/07/2018
The solution is taking "Admit on TOM, SINI [STOM] Dismissal Date: 07/20/2021". Is there any solution that gets only near Admit.
CodePudding user response:
Here you go:
import re
admission_keys = ['Admit','Adm:']
date_patterns = ['\d{2}/\d{2}/\d{4}', '\d{1,2}[-]\d{1,2}[-]\d{2,4}']
adm_pattern = "|".join(admission_keys)
date_pattern = "|".join(date_patterns)
pattern = re.compile(rf"((?:{adm_pattern}) (?:{date_pattern}))")
s = """
Sentence 1: Admit on TOM, SINI [STOM] Dismissal Date: 07/20/2021
Sentence 2: Admit 08/07/2021 TOM, SINI [STOM]
Sentence 3: Adm: 09/07/2018 TOM, SINI [STOM]
Sentence 4: Adm: 4-12-2014
Sentence 5: Admit 05-25-16
"""
pattern.findall(s)
# ['Admit 08/07/2021', 'Adm: 09/07/2018', 'Adm: 4-12-2014', 'Admit 05-25-16']