Home > Blockchain >  merge grouped results into one document in Mongo
merge grouped results into one document in Mongo

Time:03-22

I have a document that looks like this

  {
    "_id": {
      "$oid": "6187e5fa4ebcc2db6c86081f"
    },
    "publicId": "S23DCL",
    "flights": [
      {
        "name": "f1",
        "_id": {
          "$oid": "620026293fc350024da614dd"
        },
        "wines": [
          {
            "id": "1",
            "name": "wine1"
          },
          {
            "id": "2",
            "name": "wine2"
          }
        ]
      },
      {
        "name": "f2",
        "_id": {
          "$oid": "6200263c3fc350024da614de"
        },
        "wines": [
          {
            "id": "3",
            "name": "leVin"
          },
          {
            "id": "4",
            "name": "theWine"
          }
        ]
      }
    ],
    "title": "Pinot Noir 2022",
    "scores": [
      {
        "userId": "f8Dv",
        "userName": "gugus",
        "scores": [
          {
            "score": 95,
            "wineId": "1"
          },
          {
            "score": 88,
            "wineId": "2"
          }
        ]
      },
      {
        "userId": "yLjh",
        "userName": "test",
        "scores": [
          {
            "score": 92,
            "wineId": "1"
          },
          {
            "score": 87,
            "wineId": "2"
          }
        ]
      },
      {
        "userId": "B6em",
        "userName": "jklsdf",
        "scores": [
          {
            "score": 88,
            "wineId": "1"
          },
          {
            "score": 90,
            "wineId": "2"
          },
          {
            "score": 92,
            "wineId": "3"
          },
          {
            "score": 86,
            "wineId": "4"
          }
        ],
        "isFinished": false
      }
    ],
  }

I if it's too long for the question I can shorten it.
The whole document and query can also be found in mongoplayground

What I want to do is to create a scoreboard for all wines, meaning caluclating avg etc. for every wine, while still having just one document.

Something like this

{
  "title": "Pinot Noir 2022"
  "results": [
    {
       "wine": {
          "flightName": "f1",
           "wineIndex": 1,
           "name": "wine1",
           "wineId": "1"
        },
        "avg": 90,
        "scores": [
           {
        "scores": {
          "score": 87,
          "wineId": "1"
        },
        "userId": "yLjh",
        "userName": "test"
      },
        ] 
    }
   ]
}

I'm almost there but struggle with the part were I want a single result document after I grouped it.

the wineId within scores coresponds to the id in flights.wines

Here is what I have so far

db.collection.aggregate([
  {
    "$match": {
      "publicId": "S23DCL"
    }
  },
  {
    "$project": {
      "scores": "$scores",
      "publicId": "$publicId"
    }
  },
  {
    "$unwind": "$scores"
  },
  {
    "$unwind": "$scores.scores"
  },
  {
    $lookup: {
      from: "collection",
      let: {
        wId: "$scores.scores.wineId",
        "tastingId": "$_id"
      },
      pipeline: [
        {
          "$unwind": "$flights"
        },
        {
          "$unwind": {
            "path": "$flights.wines",
            "includeArrayIndex": "index"
          }
        },
        {
          $match: {
            $expr: {
              "$and": [
                {
                  "$eq": [
                    "$flights.wines.id",
                    "$$wId"
                  ]
                },
                {
                  "$eq": [
                    "$_id",
                    "$$tastingId"
                  ]
                }
              ]
            },
            
          }
        },
        {
          $project: {
            _id: 0,
            "name": "$flights.wines.name",
            "flight": "$flights.name",
            "wineId": "$flights.wines.id",
            "index": "$index"
          }
        }
      ],
      as: "wine"
    }
  },
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          {
            "_id": "$_id",
            "scores": "$scores"
          },
          {
            "wine": {
              "$arrayElemAt": [
                "$wine",
                0
              ]
            }
          }
        ]
      }
    }
  },
  {
    "$group": {
      "_id": "$wine",
      "avg": {
        "$avg": "$scores.scores.score"
      },
      "min": {
        "$min": "$scores.scores.score"
      },
      "max": {
        "$max": "$scores.scores.score"
      },
      "stddev": {
        "$stdDevPop": "$scores.scores.score"
      },
      "tmp": {
        "$max": 1
      },
      "scores": {
        "$addToSet": "$scores"
      }
    }
  }
])

This gives me data as seen in results above. But data like "title" is missing.
Note the tmp field. I tried to add this dummy field and group on it in later stage.
Since there will always only be one group (because tmp is static).
Like

// query from abive
,{
 "$group": {
  "$_id": "tmp"
  // add my grouped results from before into an array
}
}

So how can I add my grouped results into an array, so I don't get (n) groups back but one documents where the grouped results are in an array?

Hope it's clear what I mean :)

CodePudding user response:

Maybe something like this:

 {
  $group: {
  _id: "tmp",
   array: {
    $push: "$$ROOT"
   }
  }
}

playground

CodePudding user response:

when you are replacing the root using replaceRoot your title gets missing . first you have to add it to the new root level. this returns your title .

[
  {
    '$match': {
      'publicId': 'S23DCL'
    }
  }, {
    '$project': {
      'scores': '$scores', 
      'publicId': '$publicId', 
      'title': '$title', 
      'flights': '$flights'
    }
  }, {
    '$unwind': '$scores'
  }, {
    '$unwind': '$scores.scores'
  }, {
    '$lookup': {
      'from': 'collection', 
      'let': {
        'wId': '$scores.scores.wineId', 
        'tastingId': '$_id'
      }, 
      'pipeline': [
        {
          '$unwind': '$flights'
        }, {
          '$unwind': {
            'path': '$flights.wines', 
            'includeArrayIndex': 'index'
          }
        }, {
          '$match': {
            '$expr': {
              '$and': [
                {
                  '$eq': [
                    '$flights.wines.id', '$$wId'
                  ]
                }, {
                  '$eq': [
                    '$_id', '$$tastingId'
                  ]
                }
              ]
            }
          }
        }, {
          '$project': {
            '_id': 0, 
            'name': '$flights.wines.name', 
            'flight': '$flights.name', 
            'wineId': '$flights.wines.id', 
            'index': '$index'
          }
        }
      ], 
      'as': 'wine'
    }
  }, {
    '$addFields': {
      'scores.title': '$title'
    }
  }, {
    '$replaceRoot': {
      'newRoot': {
        '$mergeObjects': [
          {
            '_id': '$_id', 
            'scores': '$scores'
          }, {
            'wine': {
              '$arrayElemAt': [
                '$wine', 0
              ]
            }
          }
        ]
      }
    }
  }, {
    '$group': {
      '_id': '$wine', 
      'title': {
        '$last': '$scores.title'
      }, 
      'avg': {
        '$avg': '$scores.scores.score'
      }, 
      'min': {
        '$min': '$scores.scores.score'
      }, 
      'max': {
        '$max': '$scores.scores.score'
      }, 
      'stddev': {
        '$stdDevPop': '$scores.scores.score'
      }, 
      'tmp': {
        '$max': 1
      }, 
      'scores': {
        '$addToSet': '$scores'
      }
    }
  }
]

but your current aggregation pipeline has too many stages. if you an explain what exactly you need from your data source may be it can be little optimised

  • Related