Home > Back-end >  MongoDB Aggregation - Counting the occurrences of elements in an array
MongoDB Aggregation - Counting the occurrences of elements in an array

Time:09-19

I am having trouble counting the occurrences of values in an array on MongoDB. Below is what I have currently.

I have 130million documents in a collection with the below format:

{
  "_id": {
    "$oid": "6326ea6f53636247f2daeb19"
  },
  "customerId": 267,
  "customerName": "customer name",
  "deviceId": 7605,
  "deviceModel": "xxx",
  "gpsDateTime": {
    "$date": {
      "$numberLong": "1663494743000"
    }
  },
  "heading": 189,
  "iMEI": "461990",
  "lat": 33.0997333334,
  "lng": -117.2096316667,
  "msgDateTime": {
    "$date": {
      "$numberLong": "1663494767156"
    }
  },
  "speedKPH": 0,
  "statusCode": "REPORT",
  "tripStartDateTime": {
    "$date": {
      "$numberLong": "1663384611590"
    }
  },
  "vehicleGroupId": 564,
  "vehicleGroupname": "vehicle group name",
  "vehicleId": 7474,
  "vehicleName": "vehicle name"
}

I have an aggregation for mongodb as follows:

db.gpsmessage.aggregate([{
     $match: {
      $and: [
       {
        msgDateTime: {
         $exists: true
        }
       },
       {
        msgDateTime: {
         $ne: null
        }
       },
       {
        msgDateTime: {
         $gt: ISODate('2020-12-31T00:00:00.000Z')
        }
       },
       {
        msgDateTime: {
         $lt: ISODate('2022-09-30T00:00:00.000Z')
        }
       }
      ]
     }
    }, {
     $project: {
      msgDateTime: 1,
      customerId: 1,
      customerName: 1,
      deviceId: 1,
      deviceModel: 1,
      iMEI: 1,
      vehicleId: 1,
      vehicleName: 1,
      statusCode: 1,
      vehicleGroupId: 1,
      vehicleGroupname: 1
     }
    }, {
     $group: {
      _id: {
       deviceId: '$deviceId',
       year: {
        $year: '$msgDateTime'
       },
       month: {
        $month: '$msgDateTime'
       }
      },
      customerId: {
       $last: '$customerId'
      },
      customerName: {
       $last: '$customerName'
      },
      deviceId: {
       $last: '$deviceId'
      },
      deviceModel: {
       $last: '$deviceModel'
      },
      iMEI: {
       $last: '$iMEI'
      },
      vehicleId: {
       $last: '$vehicleId'
      },
      vehicleName: {
       $last: '$vehicleName'
      },
      vehicleGroupId: {
       $last: '$vehicleGroupId'
      },
      vehicleGroupName: {
       $last: '$vehicleGroupname'
      },
      firstMsgDate: {
       $min: '$msgDateTime'
      },
      lastMsgDate: {
       $max: '$msgDateTime'
      },
      totalMessages: {
       $sum: 1
      }
     }
    }, {
     $project: {
      _id: {
       deviceId: 1,
       year: 1,
       month: 1
      },
      year: '$_id.year',
      month: '$_id.month',
      deviceId: 1,
      customerId: 1,
      customerName: 1,
      deviceModel: 1,
      iMEI: 1,
      vehicleId: 1,
      vehicleName: 1,
      firstMsgDate: 1,
      lastMsgDate: 1,
      totalMessages: 1
     }
    }, {
     $out: 'usage_info'
    }],{ allowDiskUse:true } );

This outputs the following document:

{
  "_id": {
    "deviceId": 3494,
    "year": 2022,
    "month": 8
  },
  "customerId": 8,
  "customerName": "customer name",
  "deviceId": 3494,
  "deviceModel": "xxx",
  "iMEI": "00000000030303030",
  "vehicleId": 3651,
  "vehicleName": "vehicle name",
  "firstMsgDate": {
    "$date": {
      "$numberLong": "1659741888000"
    }
  },
  "lastMsgDate": {
    "$date": {
      "$numberLong": "1661906908000"
    }
  },
  "totalMessages": 32,
  "totalStatus": [
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "LOW_BATT",
    "LOW_BATT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "REPORT",
    "PWR_ON",
    "LOW_BATT",
    "REPORT",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT",
    "LOW_BATT",
    "PWR_ON",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT",
    "STATUS_EVT"
  ],
  "year": 2022,
  "month": 8
}

What I am trying to figure out is how to count the occurrences in "totalStatus" and modify the response so the totalStatus transforms into the below format and I want to keep the rest of the fields I grouped (these are not the only status messages so the counting must be dynamic):

[{"REPORT": 10},{"STATUS_EVT":15},{"PWR_ON":5}, .....]

I have been trying to find a way to do this for 2 days with no luck. Help will be much appreciated.

CodePudding user response:

One option is to replace your $group and $project with:

  1. First group also by status
  2. Format the status to a dictionary using $arrayToObject
  3. Group again, now by year, month and deviceId only.
  4. Format
db.collection.aggregate([
  {$group: {
      _id: {
        deviceId: "$deviceId",
        year: {$year: "$msgDateTime"},
        month: {$month: "$msgDateTime"},
        status: "$statusCode"
      },
      customerId: {$last: "$customerId"},
      customerName: {$last: "$customerName"},
      deviceId: {$last: "$deviceId"},
      deviceModel: {$last: "$deviceModel"},
      iMEI: {$last: "$iMEI"},
      vehicleId: {$last: "$vehicleId"},
      vehicleName: {$last: "$vehicleName"},
      vehicleGroupId: {$last: "$vehicleGroupId"},
      vehicleGroupName: {$last: "$vehicleGroupname"},
      firstMsgDate: {$min: "$msgDateTime"},
      lastMsgDate: {$max: "$msgDateTime"},
      totalMessages: {$sum: 1}
    }
  },
  {$set: {status: [{k: "$_id.status", v: "$totalMessages"}],
      year: "$_id.year",  month: "$_id.month"}
  },
  {$set: {status: {$arrayToObject: "$status"}}},
  {$group: {_id: {deviceId: "$_id.deviceId", month: "$_id.month", year: "$_id.year"},
      totalStatus: {$push: "$status"}, data: {$first: "$$ROOT"},
      totalMessages: {$sum: "$totalMessages"}}},
  {$set: {
      "data._id.status": "$$REMOVE",
      "data.status": "$$REMOVE",
      "data.totalStatus": "$totalStatus",
      "data.totalMessages": "$totalMessages"
  }},
  {$replaceRoot: {newRoot: "$data"}}
])

See how it works on the playground example

  • Related