Home > Back-end >  (MongoDB) Combining Union and Intersection in the same pipeline
(MongoDB) Combining Union and Intersection in the same pipeline

Time:10-04

I have the following aggregation pipeline running in the latest version of mongoDB and pymongo:

[
  {
    "$project": {
      "union": {
        "$setUnion": [
          "$query_a",
          "$query_b"
        ]
      }
    }
  },
  {
    "$unwind": "$union"
  },
  {
    "$group": {
      "_id": "$union.ID",
      "date_a": {
        "$addToSet": "$union.date_a"
      },
      "date_b": {
        "$addToSet": "$union.date_b"
      }
    }
  },
  {
    "$unwind": "$date_a"
  },
  {
    "$unwind": "$date_b"
  },
  {
    "$project": {
      "_id": 1,
      "date_a": "$date_a",
      "date_b": "date_b",
      "diff": {
        "$subtract": [
          {
            "$toInt": "$date_b"
          },
          {
            "$toInt": "$date_a"
          }
        ]
      }
    }
  },
  {
    "$match": {
      "diff": {
        "$gt": 0,
        "$lte": 20
      }
    }
  },
  
]

This gives the union of the 2 pipelines query_a and query_b. After this union I want to get an intersection on ID with the pipeline query_c: (query_a UNION query_b) INTERSECTION query_c.

For this playground example the desired output would be:

    [
      {
        "ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf",
        
      },
      {
        "ID": "cdbcc129-548a-9d51-895a-1538200664e6",
      }
    ]

CodePudding user response:

You could change and augment your pipeline a little to get your desired output.

db.collection.aggregate([
  {
    "$project": {
      "union": {
        // do the intersection here
        "$filter": {
          "input": {
            "$setUnion": [
              "$query_a",
              "$query_b"
            ]
          },
          "as": "elem",
          "cond": {
            // only take IDs in query_c
            "$in": ["$$elem.ID", "$query_c.ID"]
          }
        }
      }
    }
  },
  {
    "$unwind": "$union"
  },
  {
    "$group": {
      "_id": "$union.ID",
      "date_a": {
        "$addToSet": "$union.date_a"
      },
      "date_b": {
        "$addToSet": "$union.date_b"
      }
    }
  },
  {
    "$unwind": "$date_a"
  },
  {
    "$unwind": "$date_b"
  },
  {
    "$project": {
      "diff": {
        "$subtract": [
          {
            "$toInt": "$date_b"
          },
          {
            "$toInt": "$date_a"
          }
        ]
      }
    }
  },
  {
    "$match": {
      "diff": {
        "$gt": 0,
        "$lte": 20
      }
    }
  },
  { // get unique _id's
    "$group": {
      "_id": "$_id"
    }
  },
  { // rename _id to ID
    "$project": {
      "_id": 0,
      "ID": "$_id"
    }
  }
])

Try it on mongoplayground.net.

CodePudding user response:

You can do it with:

  1. Updating first $project stage to also project an array of IDs from query_c.

  2. Using $set as a second stage where you would filter out all items from the union of query_a and query_b, that does not have ID that's in query_c.

You can do it like this:

{
  "$project": {
    "union": {
      "$setUnion": [
        "$query_a",
        "$query_b"
      ]
    },
    "query_c": {
      "$map": {
        "input": "$query_c",
        "in": "$$this.ID"
      }
    }
  }
},
{
  "$set": {
    "union": {
      "$filter": {
        "input": "$union",
        "cond": {
          "$in": [
            "$$this.ID",
            "$query_c"
          ]
        }
      }
    }
  }
},

The rest of your Aggregation pipeline can remain the same.

Working example

  • Related