Home > Software design >  filtering documents containing three level nested arrays depending on the values in the most nested
filtering documents containing three level nested arrays depending on the values in the most nested

Time:04-29

I would be very grateful for any help in such situation. Collection in MongoDB (now only 3 objects for demonstration purpose):

{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d85"), 
    "agentId" : NumberInt(1), 
    "agentName" : "Digital Queen", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-03-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(1), 
                            "policyStatusDate" : ISODate("2022-02-20T04:46:15.000 0000")
                        }, 
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-02-19T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }, 
        {
            "receivedDate" : ISODate("2022-01-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(3), 
                            "policyStatusDate" : ISODate("2022-02-16T04:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}
{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d86"), 
    "agentId" : NumberInt(2), 
    "agentName" : "Iron Lady", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-02-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-03-03T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}
{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d87"), 
    "agentId" : NumberInt(3), 
    "agentName" : "Proxy Agent", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-04-10T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(1), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(3), 
                            "policyStatusDate" : ISODate("2022-04-09T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}

I have to return only those documents that are only in a certain period of time. There must be a match in at least one of the fields: 1) "receivedDate" (which is located in the array at the first level of nesting - policyList) or 2) "policyStatusDate" (which is in the array at the third level of nesting - policiesArray). One match is enough, for example if I want to see documents from 01/02/2022 to 01/03/2022 I expect to see the first and second document (first document matches by policyStatusDate - 20/02/2022 and second document matches by receivedDate - 23/02/2022) and third document I don't expect to see because both dates in this document from April.

My aggregation request:

db.getCollection("offers").aggregate([
{
  $project: {
  "agentId": "$agentId",
  "agentName": "$agentName",
  "policyList": {
        $map: {
                input: "$policyList",
                as: "policies",
                in:  {
                  receivedDate: "$$policies.receivedDate",
                  policyStatusDetail: {
                    $map: {
                     input: "$$policies.policyStatusDetail",
                     as: "items",
                     in: {
                         policyStsCode: "$$items.policyStsCode",
                         policiesArray: {
                           $filter: {
                             input: "$$items.policiesArray",
                             as: "item",
                             cond: {
                               $or: [
                                {$and: [{$gte: ["$$policies.receivedDate", ISODate("2022-02-01")]},
                                    {$lte: ["$$policies.receivedDate", ISODate("2022-03-31")]}
                                    ]
                                    },
                                
                                {$and: [
                                    {$gte: ["$$item.policyStatusDate", ISODate("2022-02-01")]},
                                    {$lte: ["$$item.policyStatusDate", ISODate("2022-03-31")]}
                                    ]}
                                ]
                             }
                           }
                         }
                      }
                    }
                  }
                }
        } 
    }
  }

}
])

I received all 3 documents instead of only first and second document. I received third document with all fields and field policiesArray (type of array) is empty (as a result of my query). If this array is empty it means that document is not in requested period of time because it didn't pass a check. But I don't need to see this document. Result:

{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d85"), 
    "agentId" : NumberInt(1), 
    "agentName" : "Digital Queen", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-03-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(1), 
                            "policyStatusDate" : ISODate("2022-02-20T04:46:15.000 0000")
                        }, 
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-02-19T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }, 
        {
            "receivedDate" : ISODate("2022-01-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(3), 
                            "policyStatusDate" : ISODate("2022-02-16T04:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}
{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d86"), 
    "agentId" : NumberInt(2), 
    "agentName" : "Iron Lady", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-02-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-03-03T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}
{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d87"), 
    "agentId" : NumberInt(3), 
    "agentName" : "Proxy Agent", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-04-10T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(1), 
                    "policiesArray" : [

                    ]
                }
            ]
        }
    ]
}

So my question is what I need to add to my query to avoid returning a documents with empty policiesArray (third nesting level array)? Maybe also there are better options how to make this query correctly?

CodePudding user response:

EDIT: Complete redo to fix logic (hopefully)

The match stage needs just a single true/false, so nested "$reduce" can dive into each array element and evaluate/aggregate the condition.

db.collection.aggregate({
  "$match": {
    "$expr": {
      "$reduce": {
        "input": "$policyList",
        "initialValue": false,
        "in": {
          "$or": [
            "$$value",
            {
              "$and": [
                { "$gte": [ "$$this.receivedDate", ISODate("2022-02-01") ] },
                { "$lte": [ "$$this.receivedDate", ISODate("2022-03-01") ] }
              ]
            },
            {
              "$reduce": {
                "input": "$$this.policyStatusDetail",
                "initialValue": false,
                "in": {
                  "$or": [
                    "$$value",
                    {
                      "$reduce": {
                        "input": "$$this.policiesArray",
                        "initialValue": false,
                        "in": {
                          "$or": [
                            "$$value",
                            {
                              "$and": [
                                { "$gte": [ "$$this.policyStatusDate", ISODate("2022-02-01") ] },
                                { "$lte": [ "$$this.policyStatusDate", ISODate("2022-03-01") ] }
                              ]
                            }
                          ]
                        }
                      }
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }
  }
})

Try it on mongoplayground.net.

  • Related