Home > Software engineering >  Optimize mongoDB query to get count of items from separate collection
Optimize mongoDB query to get count of items from separate collection

Time:05-24

I have two collections namely "tags" and "bookmarks".

Tags documents: 

        {
            "taggedBookmarksCount": 2,
            "taggedNotesCount": 0,
            "_id": "627a80e6b12b0dc78b3a6d4b",
            "name": "Article"
        },
        {
            "taggedBookmarksCount": 0,
            "taggedNotesCount": 0,
            "_id": "62797885b479b5906ef6ed43",
            "name": "Client"
        },

Bookmark Documents:

{
            "_id": "627a814db12b0dc78b3a6d54",

            "bookmarkTags": [
                {
                    "tagId": "627a814db12b0dc78b3a6d55",
                    "tag": "Article"
                },
                {
                    "tagId": "627a814db12b0dc78b3a6d56",
                    "tag": "to be read"
                }
            ],
            "bookmarkTitle": "Please sorrow of work",
  
           
        }

Objective is to get the counts of bookmarks for all the tags in the "tags" collection.

Below is my current implementation, which returns the count of bookmarks for each tags.But this query takes around 3 sec to run (REST API response time) for 20 tags.

        tags = await Tag.find(
          {
            userId: req.params.userId
          },
          { _id: 1 }
        );
        tagIds = tags.map(tag => {
          return tag._id.toString();
        });

    const tagCounts = await Promise.all(
      tagIds.map(async tagId => {
        const count = await Model.aggregate([
          {
            $match: {
              bookmarkTags: {
                $elemMatch: {
                  tagId: tagId
                }
              }
            }
          },
          {
            $group: {
              _id: '_id',
              count: {
                $sum: 1
              }
            }
          }
        ]);
        return { tagId, count: count[0] ? count[0].count : 0 };
      })
    );

I am assuming its taking longer as I am mapping over all the tags, there are multiple round trips to database.Please suggest an approach to reduce the time of query execution.

CodePudding user response:

You can do as below

db.bookmark.aggregate([
  {
    "$unwind": "$bookmarkTags" //Reshape tags
  },
  {
    "$lookup": { //Do a join
      "from": "tags",
      "localField": "bookmarkTags.tagId",
      "foreignField": "_id",
      "as": "btags"
    }
  },
  {
    "$unwind": { //reshape the array elements
      path: "$btags",
      preserveNullAndEmptyArrays: true
    }
  },
  {
    "$group": { // Group tag wise bookmarks
      "_id": "$bookmarkTags.tagId",
      "docs": {
        "$addToSet": "$btags"
      }
    }
  },
  {
    "$project": { //Get counts, project what you want.
      tag_id: "$_id",
      "count": {
        "$size": "$docs"
      },
      _id: 0
    }
  }
])

Playground

If you have given list of tag ids, then you can use it in match stage.

Updated playground

db.bookmark.aggregate([
  {
    "$unwind": "$bookmarkTags"
  },
  {
    "$lookup": {
      "from": "tags",
      "localField": "bookmarkTags.tagId",
      "foreignField": "_id",
      "as": "btags"
    }
  },
  {
    "$unwind": {
      path: "$btags",
      preserveNullAndEmptyArrays: true
    }
  },
  {
    "$group": {
      "_id": "$btags._id",
      "docs": {
        "$push": "$btags"
      }
    }
  },
  {
    "$project": {
      tag_id: "$_id",
      "count": {
        "$size": "$docs"
      },
      _id: 0
    }
  }
])
  • Related