Home > Software engineering >  Python lambda groupby with aggagration result in syntax error
Python lambda groupby with aggagration result in syntax error

Time:10-09

Hello i am struggling with the lambda groupby function in combination with a nested structure to get the result in the structure as shown in the example below:

Target structure

#  This already works! ########################################################
# GM0014": {
# "i1401": {
# "score": 1.178,
# "rawScore": -1.178,
# "year": "2019",
# "id": "i1401"
# },
# "i1021": {
# "score": 1.838,
# "rawScore": -1.838,
# "year": "2020",
# "id": "i1021"
# },
# "i1022": {
# "score": 0.496,
# "rawScore": -0.496,
# "year": "2020",
# "id": "i1022"
# },
# "i1013": {
# "score": 0.415,
# "rawScore": 0.415,
# "year": "2020",
# "id": "i1013"
# },
#  This does not work! ########################################################
# "overAll": {
# "score": 0.982,
# "rawScore": -0.774

I use here fore the below dataset. This values should be transformed to the "targetstructure" as shown above. I use the scoreMax and rawScoremax for the Overall section.

data = [  {'region': 'GM0014', 'variable': 'i1013', 'year': '2020', 'score': 0.415, 'rawScore': 0.415, 'scoreMax': 0.982, 'rawScoreMax': -0.774 }
        , {'region': 'GM0014', 'variable': 'i1021', 'year': '2020', 'score': -1.838, 'rawScore': 1.838, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0014', 'variable': 'i1022', 'year': '2020', 'score': -0.496, 'rawScore': 0.496, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0014', 'variable': 'i1401', 'year': '2019', 'score': -1.178, 'rawScore': 1.178, 'scoreMax': 0.982, 'rawScoreMax': -0.774}
        , {'region': 'GM0034', 'variable': 'i1013', 'year': '2020', 'score': -0.913, 'rawScore': -0.913, 'scoreMax': -0.071, 'rawScoreMax': -0.385 }
        , {'region': 'GM0034', 'variable': 'i1021', 'year': '2020', 'score': -0.244, 'rawScore': 0.244, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0034', 'variable': 'i1022', 'year': '2020', 'score': -0.332, 'rawScore': 0.332, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0034', 'variable': 'i1401', 'year': '2019', 'score': -0.053, 'rawScore': 0.053, 'scoreMax': -0.071, 'rawScoreMax': -0.385}
        , {'region': 'GM0037', 'variable': 'i1013', 'year': '2020', 'score': 0.487, 'rawScore': 0.487, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1021', 'year': '2020', 'score': -2.172, 'rawScore': 2.172, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1022', 'year': '2020', 'score': -1.654, 'rawScore': 1.654, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0037', 'variable': 'i1401', 'year': '2019', 'score': 1.236, 'rawScore': -1.236, 'scoreMax': 0.769, 'rawScoreMax': -0.526}
        , {'region': 'GM0047', 'variable': 'i1013', 'year': '2020', 'score': 0.885, 'rawScore': 0.885, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1021', 'year': '2020', 'score': -2.19, 'rawScore': 2.19, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1022', 'year': '2020', 'score': -1.542, 'rawScore': 1.542, 'scoreMax': 0.562, 'rawScoreMax': -0.12}
        , {'region': 'GM0047', 'variable': 'i1401', 'year': '2019', 'score': 2.368, 'rawScore': -2.368, 'scoreMax': 0.562, 'rawScoreMax': -0.12}]

This is code works except for the Overall section

 Group by function with Lambda
 test = {key : {l['variable'] : { 'score'   : l['score']
                                 ,'rawscore': l['rawScore']
                                 ,'rawscore': l['rawScore']
                                 ,'year'    : l['year']
                                 ,'id'      : l['variable']
                                
 } for l in lines}
         for key, lines in  itertools.groupby(data, lambda p: p['region']) }

To get the "OverAll" section to work i try to modify the above code to the code below:

test = {key : {l['variable'] : { 'score'   : l['score']
                                ,'rawscore': l['rawScore']
                                ,'rawscore': l['rawScore']
                                ,'year'    : l['year']
                                ,'id'      : l['variable']
                                } 
                    for l in lines } 
            {'overAll': { 'score'    : l['scoreMax']
                         ,'rawscore' : l['rawScoreMax']
                        }
        for key, lines in  itertools.groupby(data, lambda p: p['region']) }}

But get the error:

{'overAll': { 'score' : l['scoreMax'] ^ SyntaxError: invalid syntax

Can you please help me! Mann thansk.

CodePudding user response:

I hope I've understood your question right:

from itertools import groupby

out = {}
for k, g in groupby(data, lambda p: p["region"]):
    g = list(g)

    out[k] = {
        "overAll": {"score": g[0]["scoreMax"], "rawscore": g[0]["rawScoreMax"]}
    }
    for d in g:
        out[k][d["variable"]] = d
        del out[k][d["variable"]]["scoreMax"]
        del out[k][d["variable"]]["rawScoreMax"]

print(out)

Prints:

{
    "GM0014": {
        "overAll": {"score": 0.982, "rawscore": -0.774},
        "i1013": {
            "region": "GM0014",
            "variable": "i1013",
            "year": "2020",
            "score": 0.415,
            "rawScore": 0.415,
        },
        "i1021": {
            "region": "GM0014",
            "variable": "i1021",
            "year": "2020",
            "score": -1.838,
            "rawScore": 1.838,
        },
        "i1022": {
            "region": "GM0014",
            "variable": "i1022",
            "year": "2020",
            "score": -0.496,
            "rawScore": 0.496,
        },
        "i1401": {
            "region": "GM0014",
            "variable": "i1401",
            "year": "2019",
            "score": -1.178,
            "rawScore": 1.178,
        },
    },
    "GM0034": {
        "overAll": {"score": -0.071, "rawscore": -0.385},
        "i1013": {
            "region": "GM0034",
            "variable": "i1013",
            "year": "2020",
            "score": -0.913,
            "rawScore": -0.913,
        },
        "i1021": {
            "region": "GM0034",
            "variable": "i1021",
            "year": "2020",
            "score": -0.244,
            "rawScore": 0.244,
        },

...
  • Related