How do I make the next self query on MongoDB?
SELECT e.user_id AS user_id,
e.datetime AS started_at,
(SELECT MIN(datetime) ## taking the closest "end" event datetime of that userId ##
FROM events
WHERE type = "end" AND
user_id = e.user_id AND
datetime > e.datetime) AS end_at,
FROM events AS e
WHERE e.type = "start"
Over the next event data table:
{"_id" : "1", "type": "start", "datetime": "2022-02-01T10:15Z", "userId": "1"},
{"_id" : "2", "type": "end", "datetime": "2022-02-01T10:20Z", "userId": "1"},
{"_id" : "3", "type": "start", "datetime": "2022-02-01T10:16Z", "userId": "2"},
{"_id" : "4", "type": "end", "datetime": "2022-02-01T10:21Z", "userId": "2"},
{"_id" : "5", "type": "start", "datetime": "2022-02-02T11:01Z", "userId": "1"},
{"_id" : "6", "type": "end", "datetime": "2022-02-02T11:02Z", "userId": "1"}
The expected result should look like:
user_id | started_at | end_at |
---|---|---|
1 | 2022-02-01T10:15Z | 2022-02-01T10:20Z |
2 | 2022-02-01T10:16Z | 2022-02-01T10:21Z |
1 | 2022-02-02T11:01Z | 2022-02-02T11:02Z |
CodePudding user response:
Maybe something like this:
db.collection.aggregate([
{
$sort: {
"datetime": 1
}
},
{
$project: {
"d": {
k: "$type",
v: "$datetime"
},
userId: 1
}
},
{
$group: {
_id: "$userId",
e: {
$push: "$d"
}
}
},
{
$addFields: {
e: {
$map: {
input: {
$range: [
0,
{
$size: "$e"
},
2
]
},
as: "index",
in: {
$slice: [
"$e",
"$$index",
2
]
}
}
}
}
},
{
$unwind: "$e"
},
{
$project: {
events: {
"$arrayToObject": "$e"
}
}
},
{
$project: {
userId: "$_id",
start_at: "$events.start",
end_at: "$events.end",
_id: 0
}
}
])
Explailed: ( The solution will work only if the user events start / end sequentially )
- Sort the documents by datetime.
- Rename the fields type & datetime to k,v ( suitable for $arrayToObject )
- Group the documents per userId ( Note this solution has the limitation that total number of events must not exceed 16MB per userId)
- Split the events per date/time pairs (start end , considering user cannot start new event if the previous has not finished)
- $unwind the events array
- Convert start/end array to object.
- Project the fields as per the expected output.
Not sure what the exact use case is , but in general looks abit more practical if you add sessionId for every event document so if user can start paralel sessions the start/end events to be possible easier to correlate based on sessionId.
CodePudding user response:
Here's a pipeline that closely (exactly?) follows your SQL. I converted the string datetime
to ISODate
to insure comparisons were done properly, but perhaps this is unecessary.
db.collection.aggregate([
{
// match each start
"$match": { "type": "start" }
},
{ // lookup ends for userId in collection
"$lookup": {
"from": "collection",
"localField": "userId",
"foreignField": "userId",
"let": {
"isoDate": {
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
}
},
"pipeline": [
{
"$match": {
"type": "end",
"$expr": {
"$gt": [
{
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
},
"$$isoDate"
]
}
}
}
],
"as": "endArray"
}
},
{ // output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// assumes original collection was sorted
"$first": "$endArray.datetime"
}
}
}
])
Try it on mongoplayground.net.
Here's another pipeline that uses "$setWindowFields"
, but it's not ideal. I don't know how to filter "$setWindowFields"
"output"
given the allowed operators, etc., but it works. Improvement comments welcome!
db.collection.aggregate([
{
// add winField sorted array to each doc
// containing userId docs following
// current doc
"$setWindowFields": {
"partitionBy": "$userId",
"sortBy": { "datetime": 1 },
"output": {
"winField": {
"$push": "$$CURRENT",
"window": {
"documents": [ 1, "unbounded" ]
}
}
}
}
},
{
// just keep start docs
"$match": { "type": "start" }
},
{
// sorting on start datetime
"$sort": { "datetime": 1 }
},
{
// output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// grab first end datetime
"$getField": {
"field": "datetime",
"input": {
"$first": {
"$filter": {
"input": "$winField",
"cond": { "$eq": [ "$$this.type", "end" ] }
}
}
}
}
}
}
}
])
Try it on mongoplayground.net.