Home > front end >  MongoDB query to include count of most frequent values for multiple fields
MongoDB query to include count of most frequent values for multiple fields

Time:02-22

Thank you in advance for any help!

I've a collection QR with schema similar to this:

var qrSchema = new Schema({
    qrId: { type: String, index: true },
    owner: { type: Schema.Types.ObjectId, ref: 'User' },
    qrName: { type: String },
    qrCategory: { type: String, index: true },
    shortUrl: { type: String}}
})

And collection Datas similar to this:

var dataSchema = new Schema({
    qrId: { type: String, index: true}
    city: { type: String},
    device: { type: String},
    date: { type: Date, index:true},
})

The relation between QR and Datas is 1-to-many.

I've an aggregate like this:

Model.QR.aggregate([
{ $match: {
    $and: [
        { owner: mongoose.Types.ObjectId(user._id) },
        {
        $expr: {
            $cond: [
            { $in: [ category, [ null, "", "undefined" ]] },
            true,
            { $eq: [ "$qrCategory", category ] }
            ]
        }
        }
    ]
    }
},
{ $lookup:
    {
    "from": "datas",
    "localField": "qrId",
    "foreignField": "qrId",
    "as": "data"
    }
},
{
    $project: {
    _id: 0,
    qrId: 1,
    qrName: 1,
    qrCategory: 1,
    shortUrl: 1,
    data: {
        $filter: {
            input: "$data",
            as: "item",
            cond: {
              $and: [
                { $gte: [ "$$item.date", date.start ] },
                { $lte: [ "$$item.date", date.end] }
            ] }
        }
      }
    }    
},
{ 
    $group: {
    _id: { "qrId": "$qrId", "qrName": "$qrName", "qrCategory": "$qrCategory", "shortUrl": "$shortUrl" },
    data: {
        $push: {
        dataItems: "$data",
        count: { 
            $size:  { '$ifNull': ['$data', []] }
        }
        }
    }
    }
},
{
    $sort: {
    "data.count": -1
    }
},
{
    $limit: 10,
}]).exec((err, results) => { })

Which is returning something like:

[
    {
        "_id": {
            "qrId": "0PRA",
            "qrName": "Campaign 0PRA",
            "qrCategory": "html",
            "shortUrl": "http://someurl.com/0PRA"
        },
        "data": [
            {
                "dataItems": [
                    {
                        "_id": "6200f2a8c0cf7a1c49233c7f",
                        "qrId": "0PRA",
                        "device": "iOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f2eac0cf7a1c49233c80",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f3a4c0cf7a1c49233c81",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f632c0cf7a1c49233c88",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Nanchang",
                    },
                    {
                        "_id": "6201b342c0cf7a1c49233caa",
                        "qrId": "0PRA",
                        "device": "iOS",
                        "city": "Taizhou",
                    }
                ],
                "count": 5
            }
        ]
    },
    {
        "_id": {
            "qrId": "NQ17",
            "qrName": "Campaign NQ17",
            "qrCategory": "menu",
            "shortUrl": "http://someurl.com/NQ17"
        },
        "data": [
            {
                "dataItems": [
                    {
                        "_id": "6200f207c0cf7a1c49233c7a",
                        "qrId": "NQ17",
                        "device": "iOS",
                        "city": "Singapore"
                    },
                    {
                        "_id": "8200f207c1cf7a1c49233c7a",
                        "qrId": "NQ17",
                        "device": "iOS",
                        "city": "Singapore"
                    },
                    {
                        "_id": "6200ac5db44f23b9ec2b6040",
                        "qrId": "NQ17",
                        "device": "AndroidOS",
                        "city": "San Antonio"
                    }
                ],
                "count": 3
            }
        ]
    }
]

I'm trying to include the most frequent device and city in the results after the count of dataItems, like this:

[
    {
        "_id": {
            "qrId": "0PRA",
            "qrName": "Campaign 0PRA",
            "qrCategory": "html",
            "shortUrl": "http://someurl.com/0PRA"
        },
        "data": [
            {
                "dataItems": [
                    {
                        "_id": "6200f2a8c0cf7a1c49233c7f",
                        "qrId": "0PRA",
                        "device": "iOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f2eac0cf7a1c49233c80",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f3a4c0cf7a1c49233c81",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Beijing",
                    },
                    {
                        "_id": "6200f632c0cf7a1c49233c88",
                        "qrId": "0PRA",
                        "device": "AndroidOS",
                        "city": "Nanchang",
                    },
                    {
                        "_id": "6201b342c0cf7a1c49233caa",
                        "qrId": "0PRA",
                        "device": "iOS",
                        "city": "Taizhou",
                    }
                ],
                "count": 5,
                "topDevice": "AndroidOS",  // <---- trying to add this
                "topLocation": "Beijing"   // <---- trying to add this
            }
        ]
    },
    {
        "_id": {
            "qrId": "NQ17",
            "qrName": "Campaign NQ17",
            "qrCategory": "menu",
            "shortUrl": "http://someurl.com/NQ17"
        },
        "data": [
            {
                "dataItems": [
                    {
                        "_id": "6200f207c0cf7a1c49233c7a",
                        "qrId": "NQ17",
                        "device": "iOS",
                        "city": "Singapore"
                    },
                    {
                        "_id": "8200f207c1cf7a1c49233c7a",
                        "qrId": "NQ17",
                        "device": "iOS",
                        "city": "Singapore"
                    },
                    {
                        "_id": "6200ac5db44f23b9ec2b6040",
                        "qrId": "NQ17",
                        "device": "android",
                        "city": "San Antonio"
                    }
                ],
                "count": 3,
                "topDevice": "iOS",          // <---- trying to add this
                "topLocation": "Singapore"   // <---- trying to add this
            }
        ]
    }
]

Is this possible?

Thank you very much in advance for any help or hints!

CodePudding user response:

Method 1

Use $function will be way more easier. MongoDB version >= 4.4

Sort function in js

db.collection.aggregate([
  {
    "$set": {
      "data": {
        "$map": {
          "input": "$data",
          "as": "d",
          "in": {
            "count": "$$d.count",
            "dataItems": "$$d.dataItems",
            "topDevice": {
              $function: {
                body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
                args: [ "$$d.dataItems.device" ],
                lang: "js"
              }
            },
            "topLocation": {
              $function: {
                body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
                args: [ "$$d.dataItems.city" ],
                lang: "js"
              }
            }
          }
        }
      }
    }
  }
])

mongoplayground


Method 2

db.qr.aggregate([
  {
    "$match": {
      owner: {
        "$in": [
          "1",
          "2"
        ]
      }
    }
  },
  {
    "$lookup": {
      "from": "data",
      "localField": "qrId",
      "foreignField": "qrId",
      "as": "data",
      "pipeline": [
        {
          "$facet": {
            "deviceGroup": [
              {
                "$group": {
                  "_id": "$device",
                  "sum": {
                    "$sum": 1
                  }
                }
              },
              {
                "$sort": {
                  sum: -1
                }
              },
              {
                "$limit": 1
              }
            ],
            "cityGroup": [
              {
                "$group": {
                  "_id": "$city",
                  "sum": {
                    "$sum": 1
                  }
                }
              },
              {
                "$sort": {
                  sum: -1
                }
              },
              {
                "$limit": 1
              }
            ],
            "all": []
          }
        }
      ]
    }
  },
  {
    "$set": {
      "data": {
        "$first": "$data.all"
      },
      "topDevice": {
        "$first": {
          "$first": "$data.deviceGroup._id"
        }
      },
      "topLocation": {
        "$first": {
          "$first": "$data.cityGroup._id"
        }
      }
    }
  },
  {
    $group: {
      _id: {
        "qrId": "$qrId",
        "qrName": "$qrName",
        "qrCategory": "$qrCategory",
        "shortUrl": "$shortUrl"
      },
      data: {
        $push: {
          dataItems: "$data",
          topDevice: "$topDevice",
          topLocation: "$topLocation",
          count: {
            $size: {
              "$ifNull": [
                "$data",
                []
              ]
            }
          }
        }
      }
    }
  }
])

mongoplayground

CodePudding user response:

Query

  • add the match you need, i didn't understand what the match should do
  • lookup on qrId
  • facet to group all-documents, the topDevice the topLocation
  • $set to bring those data out from the nested locations they are
  • count is added as the size of all-documents

*maybe i am missing something, but try it(first part i think its like YuTing answer)

Test code here

QR.aggregate(
[{"$lookup":
  {"from":"Datas",
   "localField":"qrId",
   "foreignField":"qrId",
   "pipeline":
   [{"$facet":
     {"dataItems":[],
      "topDevice":
      [{"$group":{"_id":"$device", "count":{"$sum":1}}},
       {"$sort":{"count":-1}}, {"$limit":1}],
      "topLocation":
      [{"$group":{"_id":"$city", "count":{"$sum":1}}},
       {"$sort":{"count":-1}}, {"$limit":1}]}}],
   "as":"data"}},
 {"$set":{"data":{"$arrayElemAt":["$data", 0]}}},
 {"$set":
  {"dataItems":"$data.dataItems",
   "count":{"$size":"$data.dataItems"},
   "topDevice":
   {"$getField":
    {"field":"_id", "input":{"$arrayElemAt":["$data.topDevice", 0]}}},
   "topLocation":
   {"$getField":
    {"field":"_id",
     "input":{"$arrayElemAt":["$data.topLocation", 0]}}},
   "data":"$$REMOVE"}}])
  • Related