I'm using Mongo and I need to aggregate the following timeseries, grouping by account_id
and get the sum of each nested values. For the sake of example I keep the dataset simple with only the pub
object, but in my real collection I have other objects and values to aggregate
[
{
"account_id": 1,
"pub": {
"cpm": NumberDecimal("1"),
"monthly": NumberDecimal("1.5")
},
"time": ISODate("2022-00-01T01:00:00.000"),
},
{
"account_id": 2,
"pub": {
"cpm": NumberDecimal("3"),
"monthly": NumberDecimal("3.5")
},
"time": ISODate("2022-00-01T01:00:00.000"),
},
{
"account_id": 1,
"pub": {
"cpm": NumberDecimal("2"),
"monthly": NumberDecimal("2.5")
},
"time": ISODate("2022-00-01T02:00:00.000"),
},
{
"account_id": 2,
"pub": {
"cpm": NumberDecimal("4"),
"monthly": NumberDecimal("4.5")
},
"time": ISODate("2022-00-01T02:00:00.000"),
}
]
Expected output
[
{
"_id": 1, // account_id
"pub": {
"cpm": 3,
"monthly": 4
}
},
{
"_id": 2, // account_id
"pub": {
"cpm": 7,
"monthly": 8
}
}
]
I have found the following two methods that works as expected, but they seems really verbose to me, especially the first. Considering that in the real collection there are many other objects and values.
Method 1
db.collection.aggregate([
{
$group: {
_id: '$account_id',
pub: {
$accumulator: {
init: function () {
return {
cpm: 0,
monthly: 0,
};
},
accumulate: function (state, cpm, monthly) {
return {
cpm: state.cpm cpm,
monthly: state.monthly monthly,
};
},
accumulateArgs: [
{ $toDouble: '$pub.cpm' },
{ $toDouble: '$pub.monthly' },
],
merge: function (state1, state2) {
return {
cpm: state1.cpm state2.cpm,
monthly: state1.monthly state2.monthly,
};
},
finalize: function (state) {
return {
cpm: state.cpm,
monthly: state.monthly,
};
},
lang: 'js',
},
},
},
}
])
Method 2
db.collection.aggregate([
{
"$group": {
"_id": "$account_id",
"pub__cpm": {
$sum: "$pub.cpm"
},
"pub__monthly": {
$sum: "$pub.monthly"
}
}
},
{
$set: {
pub: {
cpm: {
"$toDouble": "$pub__cpm"
},
monthly: {
"$toDouble": "$pub__monthly"
}
},
},
},
{
$unset: [
"pub__cpm",
"pub__monthly"
]
}
)]
It would be grate something like this
{
"$group": {
"_id": "$account_id",
pub: {
cpm: { $sum: "$pub.cpm" },
monthly: { $sum: "$pub.monthly" },
},
}
}
But it throws "The field "$pub" must be an accumulator object", that's why I ended up with the first method.
There are better ways to achieve the same result? If not, which method is faster? Thanks
CodePudding user response:
you can use $project to format your output so instead of $set
and $unset
use $project
like this
db.collection.aggregate([
{
"$group": {
"_id": "$account_id",
"pub__cpm": { $sum: "$pub.cpm" },
"pub__monthly": { $sum: "$pub.monthly" }
}
},
{
"$project": {
"pub": {
"cpm": "$pub__cpm",
"monthly": "$pub__monthly"
}
}
}
])
CodePudding user response:
After the group stage you can project the cumulative sum of the variable cpm and monthly into the pub
using the project
operation.
db.collection.aggregate([
{
"$group": {
"_id": "$account_id",
cpm: {
$sum: "$pub.cpm"
},
monthly: {
$sum: "$pub.monthly"
},
}
},
{
"$project": {
pub: {
cpm: "$cpm",
monthly: "$monthly"
}
}
}
])