I have a list
variable listA as below:
[
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
I need to return a dict
out of this list with expanding the digits in the first field. The end result would look like below:
{
abcd1: 4d4e,
abcd2: 4d4e,
xyz0: 551,
xyz1: 551,
foo: 3ea,
bar1: 2bd,
mc-mqisd0: 77a,
mc-mqisd1: 77a,
mc-mqisd2: 77a,
}
I have created below function. It is working with python3 but throwing unmatched group error in some lower python versions.
Anything wrong with the regex here?
def listFln(listA):
import re
fL = []
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
comp = re.sub('^(. ?)(\d -\d )?$',r'\1',aL)
cmpCountR = re.sub('^(. ?)(\d -\d )?$',r'\2',aL)
if cmpCountR.strip():
nStart = int(cmpCountR.split('-')[0])
nEnd = int(cmpCountR.split('-')[1])
for j in range(nStart,nEnd 1):
fL.append(comp str(j) ' ' bL)
else:
fL.append(i)
return(dict([k.split() for k in fL]))
Error:
cmpCountR = re.sub('^(. ?)(\d -\d )?$',r'\2',aL)
File "/usr/lib64/python2.7/re.py", line 151, in sub
return _compile(pattern, flags).sub(repl, string, count)
File "/usr/lib64/python2.7/re.py", line 275, in filter
return sre_parse.expand_template(template, match)
File "/usr/lib64/python2.7/sre_parse.py", line 800, in expand_template
raise error, "unmatched group"
CodePudding user response:
Here's a simpler version using findall
instead of sub
, successfully tested on 2,7. It also directly creates the dict instead of first building a list:
mylist=[
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
def listFln(listA):
import re
fL = {}
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
comp = re.findall('^(. ?)(\d -\d )?$',aL)[0]
if comp[1]:
nStart = int(comp[1].split('-')[0])
nEnd = int(comp[1].split('-')[1])
for j in range(nStart,nEnd 1):
fL[comp[0] str(j)] = bL
else:
fL[comp[0]] = bL
return fL
print(listFln(mylist))
# {'abcd1': '4d4e',
# 'abcd2': '4d4e',
# 'xyz0': '551',
# 'xyz1': '551',
# 'foo': '3ea',
# 'bar1': '2bd',
# 'mc-mqisd0': '77a',
# 'mc-mqisd1': '77a',
# 'mc-mqisd2': '77a'}
CodePudding user response:
Used Python 2.7 to reproduce:
Both patterns compile
import re
# both seem identical
regex1 = '^(. ?)(\d -\d )?$'
regex2 = '^(. ?)(\d -\d )?$'
# also the compiled pattern is identical, see hash
re.compile(regex1) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
re.compile(regex2) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
Note: The compiled pattern using re.compile()
saves time when re-using multiple times like in this loop.
Fix: test for groups found
The error-message indicates that there are groups that aren't matched.
Put it other: In the matching result of re.sub
(docs to 2.7) there are references to groups like the second capturing group (\2
) that have not been found or captured in the given string input:
sre_constants.error: unmatched group
To fix this, we should test on groups that were found in the match.
Therefore we use re.match(regex, str)
or the compiled variant pattern.match(str)
to create a Match
object, then Match.groups()
to return all found groups as tuple.
import re
regex = '^(. ?)(\d -\d )?$'
pattern = re.compile(regex) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
def listFln(listA):
fL = []
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
# test for match and groups found
match = pattern.match(aL)
print("DEBUG groups:", match.groups()) # tuple containing all the subgroups of the match,
# watch: the 3 iteration has only group(1)
# break to next iteration here, if no 2nd group
if not match or not match.group(2):
continue
comp = re.sub(pattern, r'\1', aL)
cmpCountR = re.sub(pattern, r'\2', aL)
if cmpCountR.strip():
parts = cmpCountR.split('-')
nStart = int(parts[0])
nEnd = int(parts[1])
for j in range(nStart,nEnd 1):
fL.append(comp str(j) ' ' bL)
else:
fL.append(i)
return dict([k.split() for k in fL])
listA = [
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
as_dict = listFln(listA)
print("resulting dict:", as_dict)
Prints:
('DEBUG groups:', ('abcd', '1-2'))
('DEBUG groups:', ('xyz', '0-1'))
('DEBUG groups:', ('foo', None))
('DEBUG groups:', ('bar1', None))
('DEBUG groups:', ('mc-mqisd', '0-2'))
('resulting dict:', {'mc-mqisd2': '77a', 'mc-mqisd0': '77a', 'mc-mqisd1': '77a', 'xyz1': '551', 'xyz0': '551', 'abcd1': '4d4e', 'abcd2': '4d4e'})