Hello i am struggling with the lambda groupby function in combination with a nested structure to get the result in the structure as shown in the example below:
Target structure
# This already works! ########################################################
# GM0014": {
# "i1401": {
# "score": 1.178,
# "rawScore": -1.178,
# "year": "2019",
# "id": "i1401"
# },
# "i1021": {
# "score": 1.838,
# "rawScore": -1.838,
# "year": "2020",
# "id": "i1021"
# },
# "i1022": {
# "score": 0.496,
# "rawScore": -0.496,
# "year": "2020",
# "id": "i1022"
# },
# "i1013": {
# "score": 0.415,
# "rawScore": 0.415,
# "year": "2020",
# "id": "i1013"
# },
# This does not work! ########################################################
# "overAll": {
# "score": 0.982,
# "rawScore": -0.774
I use here fore the below dataset. This values should be transformed to the "targetstructure" as shown above. I use the scoreMax and rawScoremax for the Overall section.
data = [ {'region': 'GM0014', 'variable': 'i1013', 'year': '2020', 'score': 0.415, 'rawScore': 0.415, 'scoreMax': 0.982, 'rawScoreMax': -0.774 }
, {'region': 'GM0014', 'variable': 'i1021', 'year': '2020', 'score': -1.838, 'rawScore': 1.838, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
, {'region': 'GM0014', 'variable': 'i1022', 'year': '2020', 'score': -0.496, 'rawScore': 0.496, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
, {'region': 'GM0014', 'variable': 'i1401', 'year': '2019', 'score': -1.178, 'rawScore': 1.178, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
, {'region': 'GM0034', 'variable': 'i1013', 'year': '2020', 'score': -0.913, 'rawScore': -0.913, 'scoreMax': -0.071, 'rawScoreMax': -0.385 }
, {'region': 'GM0034', 'variable': 'i1021', 'year': '2020', 'score': -0.244, 'rawScore': 0.244, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
, {'region': 'GM0034', 'variable': 'i1022', 'year': '2020', 'score': -0.332, 'rawScore': 0.332, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
, {'region': 'GM0034', 'variable': 'i1401', 'year': '2019', 'score': -0.053, 'rawScore': 0.053, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
, {'region': 'GM0037', 'variable': 'i1013', 'year': '2020', 'score': 0.487, 'rawScore': 0.487, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
, {'region': 'GM0037', 'variable': 'i1021', 'year': '2020', 'score': -2.172, 'rawScore': 2.172, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
, {'region': 'GM0037', 'variable': 'i1022', 'year': '2020', 'score': -1.654, 'rawScore': 1.654, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
, {'region': 'GM0037', 'variable': 'i1401', 'year': '2019', 'score': 1.236, 'rawScore': -1.236, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
, {'region': 'GM0047', 'variable': 'i1013', 'year': '2020', 'score': 0.885, 'rawScore': 0.885, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
, {'region': 'GM0047', 'variable': 'i1021', 'year': '2020', 'score': -2.19, 'rawScore': 2.19, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
, {'region': 'GM0047', 'variable': 'i1022', 'year': '2020', 'score': -1.542, 'rawScore': 1.542, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
, {'region': 'GM0047', 'variable': 'i1401', 'year': '2019', 'score': 2.368, 'rawScore': -2.368, 'scoreMax': 0.562, 'rawScoreMax': -0.12}]
This is code works except for the Overall section
Group by function with Lambda
test = {key : {l['variable'] : { 'score' : l['score']
,'rawscore': l['rawScore']
,'rawscore': l['rawScore']
,'year' : l['year']
,'id' : l['variable']
} for l in lines}
for key, lines in itertools.groupby(data, lambda p: p['region']) }
To get the "OverAll" section to work i try to modify the above code to the code below:
test = {key : {l['variable'] : { 'score' : l['score']
,'rawscore': l['rawScore']
,'rawscore': l['rawScore']
,'year' : l['year']
,'id' : l['variable']
}
for l in lines }
{'overAll': { 'score' : l['scoreMax']
,'rawscore' : l['rawScoreMax']
}
for key, lines in itertools.groupby(data, lambda p: p['region']) }}
But get the error:
{'overAll': { 'score' : l['scoreMax'] ^ SyntaxError: invalid syntax
Can you please help me! Mann thansk.
CodePudding user response:
I hope I've understood your question right:
from itertools import groupby
out = {}
for k, g in groupby(data, lambda p: p["region"]):
g = list(g)
out[k] = {
"overAll": {"score": g[0]["scoreMax"], "rawscore": g[0]["rawScoreMax"]}
}
for d in g:
out[k][d["variable"]] = d
del out[k][d["variable"]]["scoreMax"]
del out[k][d["variable"]]["rawScoreMax"]
print(out)
Prints:
{
"GM0014": {
"overAll": {"score": 0.982, "rawscore": -0.774},
"i1013": {
"region": "GM0014",
"variable": "i1013",
"year": "2020",
"score": 0.415,
"rawScore": 0.415,
},
"i1021": {
"region": "GM0014",
"variable": "i1021",
"year": "2020",
"score": -1.838,
"rawScore": 1.838,
},
"i1022": {
"region": "GM0014",
"variable": "i1022",
"year": "2020",
"score": -0.496,
"rawScore": 0.496,
},
"i1401": {
"region": "GM0014",
"variable": "i1401",
"year": "2019",
"score": -1.178,
"rawScore": 1.178,
},
},
"GM0034": {
"overAll": {"score": -0.071, "rawscore": -0.385},
"i1013": {
"region": "GM0034",
"variable": "i1013",
"year": "2020",
"score": -0.913,
"rawScore": -0.913,
},
"i1021": {
"region": "GM0034",
"variable": "i1021",
"year": "2020",
"score": -0.244,
"rawScore": 0.244,
},
...