Home > Mobile >  How to select related events from the same table?
How to select related events from the same table?

Time:03-07

How do I make the next self query on MongoDB?

SELECT e.user_id AS user_id,
       e.datetime AS started_at, 
       (SELECT MIN(datetime)  ## taking the closest "end" event datetime of that userId ##
        FROM events 
        WHERE type = "end" AND 
              user_id = e.user_id AND 
              datetime > e.datetime) AS end_at,
       
FROM events AS e
WHERE e.type = "start" 

Over the next event data table:

{"_id" : "1", "type": "start", "datetime": "2022-02-01T10:15Z", "userId": "1"},
{"_id" : "2", "type": "end", "datetime": "2022-02-01T10:20Z", "userId": "1"},
{"_id" : "3", "type": "start", "datetime": "2022-02-01T10:16Z", "userId": "2"},
{"_id" : "4", "type": "end", "datetime": "2022-02-01T10:21Z", "userId": "2"},
{"_id" : "5", "type": "start", "datetime": "2022-02-02T11:01Z", "userId": "1"},
{"_id" : "6", "type": "end", "datetime": "2022-02-02T11:02Z", "userId": "1"}

The expected result should look like:

user_id started_at end_at
1 2022-02-01T10:15Z 2022-02-01T10:20Z
2 2022-02-01T10:16Z 2022-02-01T10:21Z
1 2022-02-02T11:01Z 2022-02-02T11:02Z

CodePudding user response:

Maybe something like this:

db.collection.aggregate([
{
  $sort: {
    "datetime": 1
 }
},
{
  $project: {
     "d": {
           k: "$type",
           v: "$datetime"
        },
          userId: 1
       }
 },
 {
   $group: {
     _id: "$userId",
     e: {
        $push: "$d"
     }
 }
 },
 {
   $addFields: {
     e: {
       $map: {
         input: {
           $range: [
            0,
            {
              $size: "$e"
            },
             2
          ]
         },
         as: "index",
         in: {
           $slice: [
            "$e",
            "$$index",
            2
            ]
          }
        }
      }
    }
  },
 {
   $unwind: "$e"
 },
 {
   $project: {
     events: {
       "$arrayToObject": "$e"
     }
   }
 },
  {
   $project: {
     userId: "$_id",
     start_at: "$events.start",
     end_at: "$events.end",
     _id: 0
   }
  }
 ])

Explailed: ( The solution will work only if the user events start / end sequentially )

  1. Sort the documents by datetime.
  2. Rename the fields type & datetime to k,v ( suitable for $arrayToObject )
  3. Group the documents per userId ( Note this solution has the limitation that total number of events must not exceed 16MB per userId)
  4. Split the events per date/time pairs (start end , considering user cannot start new event if the previous has not finished)
  5. $unwind the events array
  6. Convert start/end array to object.
  7. Project the fields as per the expected output.

playground

Not sure what the exact use case is , but in general looks abit more practical if you add sessionId for every event document so if user can start paralel sessions the start/end events to be possible easier to correlate based on sessionId.

CodePudding user response:

Here's a pipeline that closely (exactly?) follows your SQL. I converted the string datetime to ISODate to insure comparisons were done properly, but perhaps this is unecessary.

db.collection.aggregate([
  {
    // match each start
    "$match": { "type": "start" }
  },
  { // lookup ends for userId in collection
    "$lookup": {
      "from": "collection",
      "localField": "userId",
      "foreignField": "userId",
      "let": {
        "isoDate": {
          "$dateFromString": {
            "dateString": "$datetime",
            "format": "%Y-%m-%dT%H:%MZ"
          }
        }
      },
      "pipeline": [
        {
          "$match": {
            "type": "end",
            "$expr": {
              "$gt": [
                {
                  "$dateFromString": {
                    "dateString": "$datetime",
                    "format": "%Y-%m-%dT%H:%MZ"
                  }
                },
                "$$isoDate"
              ]
            }
          }
        }
      ],
      "as": "endArray"
    }
  },
  { // output desired fields
    "$project": {
      "_id": 0,
      "userId": 1,
      "started_at": "$datetime",
      "end_at": {
        // assumes original collection was sorted
        "$first": "$endArray.datetime"
      }
    }
  }
])

Try it on mongoplayground.net.

Here's another pipeline that uses "$setWindowFields", but it's not ideal. I don't know how to filter "$setWindowFields" "output" given the allowed operators, etc., but it works. Improvement comments welcome!

db.collection.aggregate([
  {
    // add winField sorted array to each doc
    // containing userId docs following
    // current doc
    "$setWindowFields": {
      "partitionBy": "$userId",
      "sortBy": { "datetime": 1 },
      "output": {
        "winField": {
          "$push": "$$CURRENT",
          "window": {
            "documents": [ 1, "unbounded" ]
          }
        }
      }
    }
  },
  {
    // just keep start docs
    "$match": { "type": "start" }
  },
  {
    // sorting on start datetime
    "$sort": { "datetime": 1 }
  },
  {
    // output desired fields
    "$project": {
      "_id": 0,
      "userId": 1,
      "started_at": "$datetime",
      "end_at": {
        // grab first end datetime
        "$getField": {
          "field": "datetime",
          "input": {
            "$first": {
              "$filter": {
                "input": "$winField",
                "cond": { "$eq": [ "$$this.type", "end" ] }
              }
            }
          }
        }
      }
    }
  }
])

Try it on mongoplayground.net.

  • Related