Home > Mobile >  Mongo query to join elements of arrays contained in documents
Mongo query to join elements of arrays contained in documents

Time:09-22

I have a collection that contains information about events and attendees. The documents in the collection look something like this

{event: event_1, attendees: [A, B, C]} // A, B and C attended at event_1 
{event: event_2, attendees: [A, B]}    // A and B attended at event_2
{event: event_3, attendees: [A]}       // A only attended at event_3

What I want to build is a query that allows me to know for each attendee how many events it attended as well as how many times he met the other attendees. In other words I would like to obtain something like this

{attendee: A, howManyEvents: 3, togetherWith: B, howManyTimes: 2}
{attendee: A, howManyEvents: 3, togetherWith: C, howManyTimes: 1}
{attendee: B, howManyEvents: 2, togetherWith: A, howManyTimes: 2}
{attendee: B, howManyEvents: 2, togetherWith: C, howManyTimes: 1}
{attendee: C, howManyEvents: 1, togetherWith: A, howManyTimes: 1}
{attendee: C, howManyEvents: 1, togetherWith: B, howManyTimes: 1}

Such list would allow me to know, for instance querying for {attendee: B, togetherWith: A}, that any time B goes to an event it also meets A (howManyEvents and howManyTimes are both 2). Similarly, 1/3 of the times A goes to an event there is also C.

Is there a way to build a query that returns a list like this?

CodePudding user response:

I don't think any straightway to do this, but you can try the aggregation pipeline as below,

  • $addFields to add copy of attendees field
  • $unwind deconstruct attendees array
  • $filter to iterate loop of togetherWith and remove current attendee from copy field
  • $unwind deconstruct togetherWith array
  • $group by attendee and construct the array of togetherWith and count sum of howManyEvents
  • $unwind deconstruct togetherWith array
  • $group by attendee and togetherWith and get count of howManyTimes
  • $project to format the result
db.collection.aggregate([
  { $addFields: { togetherWith: "$attendees" } },
  { $unwind: "$attendees" },
  {
    $addFields: {
      togetherWith: {
        $filter: {
          input: "$togetherWith",
          cond: { $ne: ["$$this", "$attendees"] }
        }
      }
    }
  },
  { $unwind: "$togetherWith" },
  {
    $group: {
      _id: "$attendees",
      togetherWith: { $push: "$togetherWith" },
      howManyEvents: { $sum: 1 }
    }
  },
  { $unwind: "$togetherWith" },
  {
    $group: {
      _id: {
        attendee: "$_id",
        togetherWith: "$togetherWith"
      },
      howManyEvents: { $first: "$howManyEvents" },
      howManyTimes: { $sum: 1 }
    }
  },
  {
    $project: {
      _id: 0,
      attendee: "$_id.attendee",
      togetherWith: "$_id.togetherWith",
      howManyEvents: 1,
      howManyTimes: 1
    }
  }
])

Playground

CodePudding user response:

Query produce the results that you want.
To understand it, it can help to go stage by stage and see output.

Query

  • map to add the together with field, with missing 1 member each time (the current member)
  • unwind
  • group by attendee to sum the distinct events => howManyTimes
  • unwind
  • group by attendee and sum togetherWith : 1 => togetherWith (total)

*if you see together with NULL its for events of 1 member, its important info if one attended was only in 1 event and alone

Test code here

db.collection.aggregate([
  {
    "$set": {
      "a": {
        "$map": {
          "input": "$attendees",
          "in": {
            "attendee": "$$a",
            "event": "$event",
            "togetherWith": {
              "$setDifference": [
                "$attendees",
                [
                  "$$a"
                ]
              ]
            }
          },
          "as": "a"
        }
      }
    }
  },
  {
    "$project": {
      "_id": 0,
      "a": 1
    }
  },
  {
    "$unwind": {
      "path": "$a"
    }
  },
  {
    "$replaceRoot": {
      "newRoot": "$a"
    }
  },
  {
    "$unwind": {
      "path": "$togetherWith",
      "preserveNullAndEmptyArrays": true
    }
  },
  {
    "$set": {
      "togetherWith": {
        "$ifNull": [
          "$togetherWith",
          null
        ]
      }
    }
  },
  {
    "$group": {
      "_id": "$attendee",
      "howManyEvents": {
        "$addToSet": "$event"
      },
      "together": {
        "$push": {
          "togetherWith": "$togetherWith",
          "howManyTimes": 1
        }
      }
    }
  },
  {
    "$set": {
      "attendee": "$_id"
    }
  },
  {
    "$project": {
      "_id": 0
    }
  },
  {
    "$set": {
      "howManyEvents": {
        "$size": "$howManyEvents"
      }
    }
  },
  {
    "$unwind": {
      "path": "$together"
    }
  },
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          "$together",
          "$$ROOT"
        ]
      }
    }
  },
  {
    "$project": {
      "together": 0
    }
  },
  {
    "$group": {
      "_id": {
        "attendee": "$attendee",
        "togetherWith": "$togetherWith"
      },
      "howManyEvents": {
        "$first": "$howManyEvents"
      },
      "howManyTimes": {
        "$sum": "$howManyTimes"
      }
    }
  },
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          "$_id",
          "$$ROOT"
        ]
      }
    }
  },
  {
    "$project": {
      "_id": 0
    }
  }
])

Query2 (alternative solution facet and more array operations)

  • the first map is to add the {:togetherWith ".." :howManyTimes 1} to each attented
  • 2 unwinds to get results like
    {
    "attendee": "A",
    "togetherWith": "B",
    "howManyTimes": 1,
    "event": "event_1"
    },
    {
    "attendee": "A",
    "togetherWith": "C",
    "howManyTimes": 1,
    "event": "event_1"
    }
    ...
    
  • 2 groups in a facet
    • count the distinct events(how many events)
    • sum the how many times (how many times)
  • then its a map that combine the information from both those arrays
  • 1 unwind of that array with all information
  • simple tranformation

*If you see the results they contain also "howManyTimes": 0 members, this happens when event had only 1 member, for A is not useful because we have the howManyEvents from other documents, but for "Z" it is useful, so i kept those

Test code here

db.collection.aggregate([
  {
    "$set": {
      "a": {
        "$map": {
          "input": "$attendees",
          "in": {
            "attendee": "$$a",
            "event": "$event",
            "togetherWith": {
              "$filter": {
                "input": {
                  "$map": {
                    "input": "$attendees",
                    "in": {
                      "$cond": [
                        {
                          "$ne": [
                            "$$a",
                            "$$a1"
                          ]
                        },
                        {
                          "togetherWith": "$$a1",
                          "howManyTimes": 1
                        },
                        null
                      ]
                    },
                    "as": "a1"
                  }
                },
                "cond": {
                  "$ne": [
                    "$$f",
                    null
                  ]
                },
                "as": "f"
              }
            }
          },
          "as": "a"
        }
      }
    }
  },
  {
    "$project": {
      "_id": 0,
      "a": 1
    }
  },
  {
    "$unwind": {
      "path": "$a"
    }
  },
  {
    "$unwind": {
      "path": "$a.togetherWith",
      "preserveNullAndEmptyArrays": true
    }
  },
  {
    "$project": {
      "attendee": "$a.attendee",
      "togetherWith": "$a.togetherWith.togetherWith",
      "howManyTimes": "$a.togetherWith.howManyTimes",
      "event": "$a.event"
    }
  },
  {
    "$facet": {
      "a": [
        {
          "$group": {
            "_id": "$attendee",
            "howManyEvents": {
              "$addToSet": "$event"
            }
          }
        },
        {
          "$set": {
            "attendee": "$_id"
          }
        },
        {
          "$project": {
            "_id": 0
          }
        },
        {
          "$set": {
            "howManyEvents": {
              "$size": "$howManyEvents"
            }
          }
        }
      ],
      "b": [
        {
          "$group": {
            "_id": {
              "attendee": "$attendee",
              "togetherWith": "$togetherWith"
            },
            "howManyTimes": {
              "$sum": "$howManyTimes"
            }
          }
        },
        {
          "$replaceRoot": {
            "newRoot": {
              "$mergeObjects": [
                "$_id",
                "$$ROOT"
              ]
            }
          }
        },
        {
          "$project": {
            "_id": 0
          }
        }
      ]
    }
  },
  {
    "$set": {
      "b": {
        "$map": {
          "input": "$b",
          "in": {
            "$let": {
              "vars": {
                "howManyEvents": {
                  "$arrayElemAt": [
                    {
                      "$filter": {
                        "input": "$a",
                        "cond": {
                          "$eq": [
                            "$$m.attendee",
                            "$$m1.attendee"
                          ]
                        },
                        "as": "m1"
                      }
                    },
                    0
                  ]
                }
              },
              "in": {
                "$mergeObjects": [
                  "$$m",
                  {
                    "howManyEvents": "$$howManyEvents.howManyEvents"
                  }
                ]
              }
            }
          },
          "as": "m"
        }
      }
    }
  },
  {
    "$unwind": {
      "path": "$b"
    }
  },
  {
    "$replaceRoot": {
      "newRoot": "$b"
    }
  }
])
  • Related