I need it to extract the word that starts with a capital letter, if and only if, this word is preceded by the beginning of the sentence or by one of these options (?:,and|and|her friends|,or |or |,)
import re
match_names = ""
input_sense = "Susan gave some cosmetic gifts to her friends Lisa, Veronica and Katy, but only Katy thanked her"
#I concatenate a series of characters that probably nobody uses so that it searches at the beginning
input_sense = "rlt99ll" input_sense
if match := re.findall(r"(?:rlt99ll|,and|and|her friends|,or |or |,)\s*([A-Z].*?\b)", input_sense):
match_names = match
print("match names: ")
print(match_names)
input_sense = input_sense.replace("rlt99ll", "") #I add this aux-string only for the pattern
n = 0
print("match_auxs : ")
for name in match_names:
match_aux = match_names
for m in match_aux:
if (m == name):
match_aux[n] = ""
n = 1
n = 0
print(match_aux)
I need that output lists:
match names:
['Susan', 'Lisa', 'Veronica', 'Katy']
match_auxs :
['','Lisa', 'Veronica', 'Katy']
['Susan', '', 'Veronica', 'Katy']
['Susan', 'Lisa', '', 'Katy']
['Susan', 'Lisa', 'Veronica', '']
But I get this ( and it's wrong)...
match names:
['Susan', 'Lisa', 'Veronica', 'Katy']
match_auxs :
['', 'Lisa', 'Veronica', 'Katy']
['', '', 'Veronica', 'Katy']
['', '', '', 'Katy']
['', '', '', '']
CodePudding user response:
As said in the comments, assigning a list to another variable doesn't create a copy of it. Along with this, your code can be simplified by using functions like enumerate
:
import re
match_names = ""
input_sense = "Susan gave some cosmetic gifts to her friends Lisa, Veronica and Katy, but only Katy thanked her"
#I concatenate a series of characters that probably nobody uses so that it searches at the beginning
input_sense = "rlt99ll" input_sense
if match_names := re.findall(r"(?:rlt99ll|,and|and|her friends|,or |or |,)\s*([A-Z].*?\b)", input_sense):
print(f"match names: {match_names}")
input_sense = input_sense.replace("rlt99ll", "") #I add this aux-string only for the pattern
n = 0
print("match_auxs: ")
for index, name in enumerate(match_names):
match_aux = match_names.copy()
match_aux[index] = ""
n = 0
print(match_aux)
If you don't want to use copy on the list (for speed), this code will also work:
import re
match_names = ""
input_sense = "Susan gave some cosmetic gifts to her friends Lisa, Veronica and Katy, but only Katy thanked her"
#I concatenate a series of characters that probably nobody uses so that it searches at the beginning
input_sense = "rlt99ll" input_sense
if match_names := re.findall(r"(?:rlt99ll|,and|and|her friends|,or |or |,)\s*([A-Z].*?\b)", input_sense):
print(f"match names: {match_names}")
input_sense = input_sense.replace("rlt99ll", "") #I add this aux-string only for the pattern
n = 0
print("match_auxs: ")
prev = ""
for index, name in enumerate(match_names):
if index > 0:
match_names[index - 1] = prev
prev = match_names[index]
match_names[index] = ""
print(match_names)
match_names[-1] = prev