I have a collection of various documents similar to what is shown below as 3 objects.
{
comment:{
text_sentiment: "positive",
topic: "A"
}
}, // DOC-1
{
comment:{
text_sentiment: "negative",
topic: "A"
}}, // DOC-2
{
comment:{
text_sentiment: "positive",
topic: "B"
}},..//DOC-3 ..
I want to write an aggregation that returns results in the following structure:
{
topic: "A",
topicOccurance: 2,
sentiment: {
positive: 3,
negative: 2,
neutral: 0
}
},...
I have written an aggregation that is able to group for topic
and text_sentiment
but I do not know, how can I create a structure similar to the one shown above. Here is the aggregation that I created.
db.MyCollection.aggregate({
$match: {
_id: "xyz",
"comment.topic": {$exists: 1},
}
},{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {$sum: 1},
}
},{
$project: {
topic: {
name: "$_id.topic",
occurence: "$total"
},
sentiment: "$_id.text_sentiment"
}
},{
$sort: {"topic.occurence": -1}
})
It grouped by topic
and sentiment
, but the structure does not match the one above. How can I get a similar structure?
CodePudding user response:
Answer 1
You need 2 $group
stages.
$match
$group
- Group bycomment.topic
andcomment.topic
and$sum
.$group
- Group by_id.topic
,$sum
; and addtext_sentiment
andtotal
from previous stage intotext_sentiments
via$push
.$project
- Decorate output documents. Setsentiment
with converting fromtext_sentiments
array to key-value pair via$arrayToObject
.$sort
db.collection.aggregate([
{
$match: {
_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {
$sum: 1
},
}
},
{
$group: {
_id: "$_id.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: {
k: "$_id.text_sentiment",
v: "$total"
}
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"$arrayToObject": "$text_sentiments"
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
Sample Mongo Playground (Answer 1)
Answer 2
As mentioned text_sentiment
values are fixed, you can use the query below:
db.collection.aggregate([
{
$match: {
_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: "$comment.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: "$comment.text_sentiment"
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"positive": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"positive"
]
},
"then": 1,
"else": 0
}
}
]
}
}
},
"negative": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"negative"
]
},
"then": 1,
"else": 0
}
}
]
}
}
},
"neutral": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"neutral"
]
},
"then": 1,
"else": 0
}
}
]
}
}
}
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
Disadvantage: When the text_sentiment
value is added/removed, then you have to modify the query.
Sample Mongo Playground (Answer 2)
Answer 3
Another approach similar to Answer 2 is using $size
and $filter
to replace $reduce
.
db.collection.aggregate([
{
$match: {
//_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: "$comment.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: "$comment.text_sentiment"
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"positive": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"positive"
]
}
}
}
},
"negative": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"negative"
]
}
}
}
},
"neutral": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"neutral"
]
}
}
}
},
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])