Home > Back-end >  How to filter data in collection containing documents with 3 level nested arrays depending on the va
How to filter data in collection containing documents with 3 level nested arrays depending on the va

Time:05-01

I appreciate any help in such case. Collection in MongoDB (now only 2 documents for demonstration purpose):

{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d85"), 
    "agentId" : NumberInt(1), 
    "agentName" : "Yardi Gaondi", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-03-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(1), 
                            "policyStatusDate" : ISODate("2022-02-20T04:46:15.000 0000")
                        }, 
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-01-19T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }, 
        {
            "receivedDate" : ISODate("2022-03-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(3), 
                            "policyStatusDate" : ISODate("2022-01-16T04:46:15.000 0000")
                        }
                    ]
                }
            ]
        }, 
        {
            "receivedDate" : ISODate("2022-02-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(1), 
                            "policyStatusDate" : ISODate("2022-01-20T04:46:15.000 0000")
                        }, 
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-01-19T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}
{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d86"), 
    "agentId" : NumberInt(2), 
    "agentName" : "Michelle Hazandi", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-04-10T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-04-09T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }, 
        {
            "receivedDate" : ISODate("2022-03-10T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-03-09T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ]
}

So collection consists of 2 documents, in each document there is a field "policyList" which is an array of objects. In first document policyList contains 3 objects, in second document only two. So I have to filter documents in this collection in such way: 1) I need to keep in the "policyList" array only those objects that match to such condition: one of the fields are in a certain time interval, which means that must be a match in at least one of the fields(first field is "receivedDate" - which is located in the array at the first level of nesting - "policyList" or second field "policyStatusDate" - which is located in the array at the third level of nesting - "policiesArray", and if there is a match in one of abovementioned fields we return from the "policyList" full object, that means we cannot throw away from "policiesArray" any object). One match is enough, for example if I want to see documents from 01/02/2022 to 01/03/2022 I expect to see in the first document in the "policyList" array only first and third object, because first object matches by "policyStatusDate" - 20/02/2022 (match in one of the object in "policiesArray" is enough) and third object matches by "receivedDate" - 23/02/2022) and second object in first document I don't expect to see because both dates in this document are not in the period from 01/02/2022 to 01/03/2022; 2) if there are no any matches among objects in "policyList", that means "policyList" must be empty after filtering, and in such case we don't need to return this document. For example if I request for a documents from 01/02/2022 to 01/03/2022 I'm not expecting to see second document, because no "policyStatusDate" and no "receivedDate" are in requested time interval.

My aggregation request:

db.getCollection("offers2").aggregate([
{
  $project: {
  "agentId": "$agentId",
  "agentName": "$agentName",
  "policyList": {
      $filter: {
         input: "$policyList",
         as: "item",
         cond: {
         "$or": [
            {
              "$and": [
                { "$gte": [ "$$item.receivedDate", ISODate("2022-02-01") ] },
                { "$lte": [ "$$item.receivedDate", ISODate("2022-03-01") ] }
              ]
            }, 
            {
               $and": [
                { "$gte": [ "$$item.policyStatusDetail.policiesArray.policyStatusDate", ISODate("2022-02-01") ] },
                { "$lte": [ "$$item.policyStatusDetail.policiesArray.policyStatusDate", ISODate("2022-03-01") ] }
              ]
            }
         ]
         }
      }
  }
  }
},
{
  $project: {
  "agentId": "$agentId",
  "agentName": "$agentName",
  "policyList": "$policyList",
  "numPoliciesPerDate": {
    $cond: {  
      if: {$isArray: "$policyList"}, then: {$size: "$policyList"}, else: "0"
     }
    }
  }
  },
  {
  $match: {
    "numPoliciesPerDate": {$gte: 1}
     }
  }
]) 

After running this query I expect to receive first document with first object and third object in "policyList" array, but I received only third object (there is matching by "receivedDate" in this object ). Result:

{ 
    "_id" : ObjectId("62684847e9594c65cbaa5d85"), 
    "agentId" : NumberInt(1), 
    "agentName" : "Yardi Gaondi", 
    "policyList" : [
        {
            "receivedDate" : ISODate("2022-02-23T04:46:15.000 0000"), 
            "policyStatusDetail" : [
                {
                    "policyStsCode" : NumberInt(7), 
                    "policiesArray" : [
                        {
                            "policyDetailedCode" : NumberInt(1), 
                            "policyStatusDate" : ISODate("2022-01-20T04:46:15.000 0000")
                        }, 
                        {
                            "policyDetailedCode" : NumberInt(2), 
                            "policyStatusDate" : ISODate("2022-01-19T05:46:15.000 0000")
                        }
                    ]
                }
            ]
        }
    ], 
    "numPoliciesPerDate" : NumberInt(1)
}

So it seems that condition

 $and": [
                { "$gte": [ "$$item.policyStatusDetail.policiesArray.policyStatusDate", ISODate("2022-02-01") ] },
                { "$lte": [ "$$item.policyStatusDetail.policiesArray.policyStatusDate", ISODate("2022-03-01") ] }
              ]

doesn't work correct. I think it's because it may be impossible to use dot notation when we work with nested arrays. So maybe somebody could help me to fix this aggregation query so that the requirements I described at the beginning will be fullfiled and in our case first object from "policyList" in first document will also be returned?

CodePudding user response:

I thinks this one will do the trick, just replace the dates with the correct format of your timezone:

[
  {
    $unwind: {
      path: "$policyList"
    }
  },
  {
    $match: {
      $or: [
        {
          "policyList.receivedDate": {
            $gte: ISODate("2022-02-01T03:00:00.000Z"),
            $lte: ISODate("2022-03-01T03:00:00.000Z")
          }
        },
        {
          "policyList.policyStatusDetail.policiesArray.policyStatusDate": {
            $gte: ISODate("2022-02-01T03:00:00.000Z"),
            $lte: ISODate("2022-03-01T03:00:00.000Z")
          }
        }
      ]
    }
  },
  {
    $group: {
      _id: "$_id",
      agentId: {
        $first: "$agentId"
      },
      agentname: {
        $first: "$agentName"
      },
      policyList: {
        $push: {
          receivedDate: "$policyList.receivedDate",
          policyStatusDetail: "$policyList.policyStatusDetail"
        }
      }
    }
  },
  {
    $project: {
      "agentId": "$agentId",
      "agentName": "$agentName",
      "policyList": "$policyList",
      "numPoliciesPerDate": {
        $cond: {
          if: {
            $isArray: "$policyList"
          },
          then: {
            $size: "$policyList"
          },
          else: "0"
        }
      }
    }
  },
  {
    $match: {
      "numPoliciesPerDate": {
        $gte: 1
      }
    }
  }
]
  • Related