I am having trouble counting the occurrences of values in an array on MongoDB. Below is what I have currently.
I have 130million documents in a collection with the below format:
{
"_id": {
"$oid": "6326ea6f53636247f2daeb19"
},
"customerId": 267,
"customerName": "customer name",
"deviceId": 7605,
"deviceModel": "xxx",
"gpsDateTime": {
"$date": {
"$numberLong": "1663494743000"
}
},
"heading": 189,
"iMEI": "461990",
"lat": 33.0997333334,
"lng": -117.2096316667,
"msgDateTime": {
"$date": {
"$numberLong": "1663494767156"
}
},
"speedKPH": 0,
"statusCode": "REPORT",
"tripStartDateTime": {
"$date": {
"$numberLong": "1663384611590"
}
},
"vehicleGroupId": 564,
"vehicleGroupname": "vehicle group name",
"vehicleId": 7474,
"vehicleName": "vehicle name"
}
I have an aggregation for mongodb as follows:
db.gpsmessage.aggregate([{
$match: {
$and: [
{
msgDateTime: {
$exists: true
}
},
{
msgDateTime: {
$ne: null
}
},
{
msgDateTime: {
$gt: ISODate('2020-12-31T00:00:00.000Z')
}
},
{
msgDateTime: {
$lt: ISODate('2022-09-30T00:00:00.000Z')
}
}
]
}
}, {
$project: {
msgDateTime: 1,
customerId: 1,
customerName: 1,
deviceId: 1,
deviceModel: 1,
iMEI: 1,
vehicleId: 1,
vehicleName: 1,
statusCode: 1,
vehicleGroupId: 1,
vehicleGroupname: 1
}
}, {
$group: {
_id: {
deviceId: '$deviceId',
year: {
$year: '$msgDateTime'
},
month: {
$month: '$msgDateTime'
}
},
customerId: {
$last: '$customerId'
},
customerName: {
$last: '$customerName'
},
deviceId: {
$last: '$deviceId'
},
deviceModel: {
$last: '$deviceModel'
},
iMEI: {
$last: '$iMEI'
},
vehicleId: {
$last: '$vehicleId'
},
vehicleName: {
$last: '$vehicleName'
},
vehicleGroupId: {
$last: '$vehicleGroupId'
},
vehicleGroupName: {
$last: '$vehicleGroupname'
},
firstMsgDate: {
$min: '$msgDateTime'
},
lastMsgDate: {
$max: '$msgDateTime'
},
totalMessages: {
$sum: 1
}
}
}, {
$project: {
_id: {
deviceId: 1,
year: 1,
month: 1
},
year: '$_id.year',
month: '$_id.month',
deviceId: 1,
customerId: 1,
customerName: 1,
deviceModel: 1,
iMEI: 1,
vehicleId: 1,
vehicleName: 1,
firstMsgDate: 1,
lastMsgDate: 1,
totalMessages: 1
}
}, {
$out: 'usage_info'
}],{ allowDiskUse:true } );
This outputs the following document:
{
"_id": {
"deviceId": 3494,
"year": 2022,
"month": 8
},
"customerId": 8,
"customerName": "customer name",
"deviceId": 3494,
"deviceModel": "xxx",
"iMEI": "00000000030303030",
"vehicleId": 3651,
"vehicleName": "vehicle name",
"firstMsgDate": {
"$date": {
"$numberLong": "1659741888000"
}
},
"lastMsgDate": {
"$date": {
"$numberLong": "1661906908000"
}
},
"totalMessages": 32,
"totalStatus": [
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"LOW_BATT",
"LOW_BATT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"REPORT",
"PWR_ON",
"LOW_BATT",
"REPORT",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT",
"LOW_BATT",
"PWR_ON",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT",
"STATUS_EVT"
],
"year": 2022,
"month": 8
}
What I am trying to figure out is how to count the occurrences in "totalStatus" and modify the response so the totalStatus transforms into the below format and I want to keep the rest of the fields I grouped (these are not the only status messages so the counting must be dynamic):
[{"REPORT": 10},{"STATUS_EVT":15},{"PWR_ON":5}, .....]
I have been trying to find a way to do this for 2 days with no luck. Help will be much appreciated.
CodePudding user response:
One option is to replace your $group
and $project
with:
- First group also by
status
- Format the status to a dictionary using
$arrayToObject
- Group again, now by
year
,month
anddeviceId
only. - Format
db.collection.aggregate([
{$group: {
_id: {
deviceId: "$deviceId",
year: {$year: "$msgDateTime"},
month: {$month: "$msgDateTime"},
status: "$statusCode"
},
customerId: {$last: "$customerId"},
customerName: {$last: "$customerName"},
deviceId: {$last: "$deviceId"},
deviceModel: {$last: "$deviceModel"},
iMEI: {$last: "$iMEI"},
vehicleId: {$last: "$vehicleId"},
vehicleName: {$last: "$vehicleName"},
vehicleGroupId: {$last: "$vehicleGroupId"},
vehicleGroupName: {$last: "$vehicleGroupname"},
firstMsgDate: {$min: "$msgDateTime"},
lastMsgDate: {$max: "$msgDateTime"},
totalMessages: {$sum: 1}
}
},
{$set: {status: [{k: "$_id.status", v: "$totalMessages"}],
year: "$_id.year", month: "$_id.month"}
},
{$set: {status: {$arrayToObject: "$status"}}},
{$group: {_id: {deviceId: "$_id.deviceId", month: "$_id.month", year: "$_id.year"},
totalStatus: {$push: "$status"}, data: {$first: "$$ROOT"},
totalMessages: {$sum: "$totalMessages"}}},
{$set: {
"data._id.status": "$$REMOVE",
"data.status": "$$REMOVE",
"data.totalStatus": "$totalStatus",
"data.totalMessages": "$totalMessages"
}},
{$replaceRoot: {newRoot: "$data"}}
])
See how it works on the playground example