Home > Blockchain >  mongo / mongoose aggregation pipeline query for survey data
mongo / mongoose aggregation pipeline query for survey data

Time:03-05

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.

The results are stored in a schema like this, with each item in the array being a survey response.

[
  {
    metaData: {
      survey: new ObjectId("62206ea0b31be3535abac547")
    },
    answers: {
      'question1': 'a',
      'question2': 'a',
      'question3': ['a','c'],
      'question4': 3
    },
    createdAt: 2022-03-03T07:30:40.517Z,
  },
  {
    metaData: {
      survey: new ObjectId("62206ea0b31be3535abac547"),
    },
    answers: {
      'question1': 'a',
      'question2': 'b',
      'question3': ['a','c'],
      'question4': 2
    },
    createdAt: 2022-03-03T07:30:40.518Z,
  },
  {
    metaData: {
      survey: new ObjectId("62206ea0b31be3535abac547"),
    },
    answers: {
      'question1': 'b',
      'question2': 'c',
      'question3': ['b']
      'question4': 1
    },
    createdAt: 2022-03-03T07:30:40.518Z,
  }
]

question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.

I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.

I'd like to get a final result that looks like this

{ 
  'question1' : {
    'a' : 2,
    'b' : 1 
  },
  'question2' : {
    'a' : 1,
    'b' : 1,
    'c' : 1,
  },
  'question3' : {
    'a' : 2,
    'b' : 1,
    'c' : 2,
  },
  'question4' : 2 //avg (3 2 1)/3
}

OR even better:

{ 
  'radio': {
    'question1' : {
      'a' : 2,
      'b' : 1 
    },
    'question2' : {
      'a' : 1,
      'b' : 1,
      'c' : 1,
     },
  },
  'multi': {
    'question3' : {
      'a' : 2,
      'b' : 1,
      'c' : 2,
    }
  },
  'avg' : {
    'question4' : 2
  }
}

My pipeline would look something like this:

Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
  "answers": { $objectToArray: "$answers" },
  "createdAt": "$createdAt"
  }
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k  ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k  ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])

Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.

Bonus points if I can figure out how to also group the by day/month based on the createdAt time

CodePudding user response:

db.collection.aggregate([
  {
    $match: {}
  },
  {
    $project: { answers: { $objectToArray: "$answers" } }
  },
  {
    $unwind: "$answers"
  },
  {
    $unwind: "$answers.v"
  },
  {
    $group: {
      _id: "$answers",
      c: { "$sum": 1 }
    }
  },
  {
    $group: {
      _id: "$_id.k",
      v: { "$push": { k: "$_id.v", v: "$c" } }
    }
  },
  {
    $group: {
      _id: null,
      v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
    }
  },
  {
    $set: { v: { $arrayToObject: "$v" } }
  },
  {
    $replaceWith: "$v"
  }
])

mongoplayground


db.collection.aggregate([
  {
    $match: {}
  },
  {
    $project: { answers: { $objectToArray: "$answers" } }
  },
  {
    $unwind: "$answers"
  },
  {
    $set: {
      "answers.type": {
        $switch: {
          branches: [
            {
              case: { $isArray: "$answers.v" },
              then: "multi"
            },
            {
              case: { $eq: [ { $type: "$answers.v" }, "string" ] },
              then: "radio"
            },
            {
              case: { $isNumber: "$answers.v" },
              then: "avg"
            }
          ],
          default: "other"
        }
      }
    }
  },
  {
    $unwind: "$answers.v"
  },
  {
    $group: {
      _id: "$answers",
      c: { $sum: 1 }
    }
  },
  {
    $group: {
      _id: "$_id.k",
      type: { $first: "$_id.type" },
      v: {
        $push: {
          k: { $toString: "$_id.v" },
          v: "$c"
        }
      }
    }
  },
  {
    $group: {
      _id: "$type",
      v: {
        $push: {
          k: "$_id",
          v: { $arrayToObject: "$v" }
        }
      }
    }
  },
  {
    $group: {
      _id: null,
      v: {
        $push: {
          k: "$_id",
          v: { $arrayToObject: "$v" }
        }
      }
    }
  },
  {
    $set: { v: { $arrayToObject: "$v" } }
  },
  {
    $replaceWith: "$v"
  },
  {
    $set: {
      avg: {
        $arrayToObject: {
          $map: {
            input: { $objectToArray: "$avg" },
            as: "s",
            in: {
              k: "$$s.k",
              v: {
                $avg: {
                  $map: {
                    input: { $objectToArray: "$$s.v" },
                    as: "x",
                    in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
])

mongoplayground

  • Related