I have the following dict :
dic = {'seq1': '(prot1_val1, prot1_val10, prot2_val2, prot3_val3)',
'seq2': '(prot1_val5)',
'seq3': '(prot2_val2,prot4_val7)'
}
and I would like to obtain this :
d = { 'prot1': {'seq1': ['val1', 'val10'], 'seq2': ['val5']},
'prot2': {'seq1': ['val2'], 'seq3': ['val2']},
'prot3': {'seq1': ['val3']},
'prot4': {'seq3': ['val7']}
}
I have tried by doing like this :
d = {}
for key, value in dico.items():
t = tuple([tuple(i.split("_")) for i in re.split(',',re.sub('[()]','',value))])
for prot,val in t:
d.setdefault(prot, {})[key].apppend(val)
print(d)
But if get a KeyError
. How can I resolve this?
CodePudding user response:
Try:
d = dict()
for seq, string in dico.items():
pairs = [(x.split("_")[0].strip(), x.split("_")[1].strip()) for x in string.strip("()").split(",")]
for pro, val in pairs:
if pro not in d:
d[pro] = dict()
if seq not in d[pro]:
d[pro][seq] = list()
d[pro][seq].append(val)
>>> d
{'prot1': {'seq1': ['val1', 'val10'], 'seq2': ['val5']},
'prot2': {'seq1': ['val2'], 'seq3': ['val2']},
'prot3': {'seq1': ['val3']},
'prot4': {'seq3': ['val7']}}