Home > Enterprise >  (Mongo DB) Union of 2 pipelines with the same collection
(Mongo DB) Union of 2 pipelines with the same collection

Time:09-30

I have the following aggregation pipeline running in the latest version of mongoDB and pymongo:

[
    {
        "$facet": {
            "query_a": [
                {
                    "$match": {
                        ...
                    }
                },
                {
                    "$project": {
                        "ID": "...",
                        "date_a": "..."
                    }
                }
            ],
            "query_b": [
                {
                    "$match": {
                        ...
                    }
                },
                {
                    "$project": {
                        "ID": "...",
                        "date_b": "..."
                    }
                }
            ]
        }
    },
    {
        "$project": {
            "union": {
                "$setUnion": [
                    "$query_a",
                    "$query_b"
                ]
            },
            "query_a": 1,
            "query_b": 1
        }
    },
    {
        "$project": {
            "_id": 0,
            "data": {
                "$map": {
                    "input": "$union",
                    "in": {
                        "union": "$$this",
                        "query_a": {
                            "$first": {
                                "$filter": {
                                    "input": "$query_a",
                                    "as": "item",
                                    "cond": {
                                        "$eq": [
                                            "$$item.ID",
                                            "$$this"
                                        ]
                                    }
                                }
                            }
                        },
                        "query_b": {
                            "$first": {
                                "$filter": {
                                    "input": "$query_b",
                                    "as": "item",
                                    "cond": {
                                        "$eq": [
                                            "$$item.ID",
                                            "$$this"
                                        ]
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    },
    {
        "$unwind": "$data"
    },
    {
        "$replaceRoot": {
            "newRoot": "$data"
        }
    },
    {
        "$project": {
            "union": 1,
        }
    }
]

I am trying to get a Union on the field "ID" and I get the following example ouput:

{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "1"}}
{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "2"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "3"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "4"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "5"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "6"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "7"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "8"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "9"}}
{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_b": "10"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_b": "11"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_b": "12"}}

This from query_a:

{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "1"}}
{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "2"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "3"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "4"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "5"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "6"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "7"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "8"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "9"}}

This from query_b:

{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_b": "10"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_b": "11"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_b": "12"}}

I'm trying to achieve the following output matched on "ID":

{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "1", "date_b": "10"}}
{'union': {"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf", "date_a": "2", "date_b": "10"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "3", "date_b": "11"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "4", "date_b": "11"}}
{'union': {"ID": "cdbcc129-548a-9d51-895a-1538200664e6", "date_a": "5", "date_b": "11"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "6", "date_b": "12"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "7", "date_b": "12"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "8", "date_b": "12"}}
{'union': {"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9", "date_a": "9", "date_b": "12"}}

Your suggestions are appreciated!

Edit 1:

Here is a playground of my example: https://mongoplayground.net/p/N3CgBa6DXHG

CodePudding user response:

Try this:

db.collection.aggregate([
  {
    "$unwind": "$query_a"
  },
  {
    "$unwind": "$query_b"
  },
  {
    "$match": {
      $expr: {
        "$eq": [
          "$query_a.ID",
          "$query_b.ID"
        ]
      }
    }
  },
  {
    "$project": {
      union: {
        "$mergeObjects": [
          "$query_a",
          "$query_b"
        ]
      }
    }
  }
])

Here, we do the following:

  1. Unwind the two arrays query_a and query_b.
  2. Filter and documents where ID matches, in both query_a and query_b.
  3. Merge the documents.

Playground link.

CodePudding user response:

You can do it like this:

db.collection.aggregate([
  {
    "$project": {
      "union": {
        "$setUnion": [
          "$query_a",
          "$query_b"
        ]
      }
    }
  },
  {
    "$unwind": "$union"
  },
  {
    "$group": {
      "_id": "$union.ID",
      "date_a": {
        "$addToSet": "$union.date_a"
      },
      "date_b": {
        "$addToSet": "$union.date_b"
      }
    }
  },
  {
    "$unwind": "$date_a"
  },
  {
    "$unwind": "$date_b"
  }
])

Working example

  • Related