I would like to add a regex to this code that will allow me to find that the reference ends with -FT(NUMBER) for example CHB-16236-FT-FT045 and increment this reference to have CHB-16236-FT046
import re
pattern_poteaux = r"(POT|PHT) [-] [0-9]{5}[-] [a-zA-Z]{2}[-] \d $"
pattern_chambre = r"CHB [-] [0-9] [-] [a-zA-Z]{2}[-] \d $"
old_references = []
new_references = []
invalid_references = []
def attribute_check(pattern, sample_str):
"""
@param: regex pattern, sample string
return : True if string match regex pattern, False if not.
"""
sample_str = str(sample_str)
if re.search(pattern, sample_str) is not None:
return True
else:
return False
def increment_ref(pattern, sample_str):
"""
@param: string
return : incrément référence with 1
"""
if attribute_check(pattern, sample_str) == True:
old_references.append(sample_str)
return re.sub(r'[^-] [0-9]$', lambda x: str(int(x.group()) 1).zfill(len(x.group())), sample_str)
else:
invalid_references.append(sample_str)
if __name__ == "__main__":
reference_chamber = 'CHB-16236-FT-FT045'
# TODO increment reference with FT001 at the end
increment_ref(pattern_chambre, reference_chamber)
CodePudding user response:
You need to make use of a replacement callback:
# coding=utf8
import re
# Regex to find "-FT{digits}{end of line}
regex = r"(?<=-FT)(\d )$"
# Increment the digit by one and maintain left pad zeros
def subst(m):
return format(int(m.group(1)) 1, '0' str(len(m.group(1))))
# Test your data
print (re.sub(regex, subst, "CHB-16236-FT-FT005", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT009", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT045", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT145", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT999", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT0999", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT2009", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT09998", 0, re.MULTILINE))
Results:
CHB-16236-FT-FT006
CHB-16236-FT-FT010
CHB-16236-FT-FT046
CHB-16236-FT-FT146
CHB-16236-FT-FT1000
CHB-16236-FT-FT1000
CHB-16236-FT-FT2010
CHB-16236-FT-FT09999
CodePudding user response:
Your pattern
- allows for runs of
-
s, which may not be correct - does not try to match the characters that appear between the last
-
and the final number
CodePudding user response:
Try this. Check #Modified
comments to see the modifications from your original code:
import re
pattern_poteaux = r"(POT|PHT) [-] [0-9]{5}[-] [a-zA-Z]{2}[-] \d $"
pattern_chambre = r"(CHB-\d{5}[-FT]*(0*)?)([1-9]*)" #Modified
old_references = []
new_references = []
invalid_references = []
def attribute_check(pattern, sample_str):
"""
@param: regex pattern, sample string
return : True if string match regex pattern, False if not.
"""
sample_str = str(sample_str)
if re.search(pattern, sample_str) is not None:
return True
else:
return False
def increment_ref(pattern, sample_str):
"""
@param: string
return : incrément référence with 1
"""
if attribute_check(pattern, sample_str) == True:
old_references.append(sample_str)
# Modified
return re.sub(pattern_chambre, lambda exp: "{}{}".format(exp.group(1), str(int(exp.group(3)) 1)),sample_str)
else:
invalid_references.append(sample_str)
if __name__ == "__main__":
reference_chamber = 'CHB-16236-FT-FT045'
# TODO increment reference with FT001 at the end
print(increment_ref(pattern_chambre, reference_chamber))
print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT123'))
print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT01234'))
print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT00012'))
Output:
CHB-16236-FT-FT046
CHB-16236-FT-FT124
CHB-16236-FT-FT01235
CHB-16236-FT-FT00013