We have log/ time series data like.
Input
Session_id user_id session_timestamp
S1 U1 2019-10-01 22:00:00
S1 U1 2019-10-01 22:00:01
S1 U1 2019-10-01 22:00:02
S1 U1 2019-10-01 22:00:03
S1 U2 2019-10-01 22:00:04
S1 U2 2019-10-01 22:00:05
S1 U2 2019-10-01 22:00:06
S1 U2 2019-10-01 22:00:07
S1 U3 2019-10-01 22:00:08
S1 U3 2019-10-01 22:00:09
S1 U3 2019-10-01 22:00:10
S1 U3 2019-10-01 22:00:11
S1 U3 2019-10-01 22:00:12
S1 U1 2019-10-01 22:00:13
S1 U1 2019-10-01 22:00:14
S1 U1 2019-10-01 22:00:15
S1 U1 2019-10-01 22:00:16
Output
Session_id user_id Session_start_time Session_end_time
S1 U1 2019-10-01 22:00:00 2019-10-01 22:00:03
S1 U2 2019-10-01 22:00:04 2019-10-01 22:00:07
S1 U3 2019-10-01 22:00:08 2019-10-01 22:00:12
S1 U1 2019-10-01 22:00:13 2019-10-01 22:00:16
Explanation
We have a heartbeat logged at every second.
First four row should be considered as on session (User U1).
last four row are also part of different session (User U1).
I tried using window function with lag /lead but i was not able to distinguish the second session of u1 any version of sql work for me.
Script for data
create table logs(
Session_id varchar(10),
user_id varchar(10),
session_timestamp date
)
insert into logs
select * from( select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:00' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:01' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:02' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:03' as session_timestamp union
select 'S1' as session_id, 'U2' as user_id , '2019-10-01 22:00:04' as session_timestamp union
select 'S1' as session_id, 'U2' as user_id , '2019-10-01 22:00:05' as session_timestamp union
select 'S1' as session_id, 'U2' as user_id , '2019-10-01 22:00:06' as session_timestamp union
select 'S1' as session_id, 'U2' as user_id , '2019-10-01 22:00:07' as session_timestamp union
select 'S1' as session_id, 'U3' as user_id , '2019-10-01 22:00:08' as session_timestamp union
select 'S1' as session_id, 'U3' as user_id , '2019-10-01 22:00:09' as session_timestamp union
select 'S1' as session_id, 'U3' as user_id , '2019-10-01 22:00:10' as session_timestamp union
select 'S1' as session_id, 'U3' as user_id , '2019-10-01 22:00:11' as session_timestamp union
select 'S1' as session_id, 'U3' as user_id , '2019-10-01 22:00:12' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:13' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:14' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:15' as session_timestamp union
select 'S1' as session_id, 'U1' as user_id , '2019-10-01 22:00:16' as session_timestamp ) a
http://sqlfiddle.com/#!18/ed396
CodePudding user response:
You can use a self-join
with a cte
:
with cte(id, uid, t, c) as (
select l.session_id, l.user_id, l.session_timestamp, sum(case when l.session_id = l1.session_id and l1.user_id != l.user_id then 1 end) from logs l join logs l1 on l.session_timestamp > l1.session_timestamp
group by l.session_id, l.user_id, l.session_timestamp
)
select t.id, t.uid, t.start, t.end from (select c.c, c.id, c.uid, min(c.t) start, max(c.t) end from cte c group by c.c, c.id, c.uid) t order by t.start
CodePudding user response:
Here is solution for Sqlserver sample solution
Here is solution for Mysql sample solution
with cte(id, uid, t, c) as (
select l.session_id, l.user_id, l.session_timestamp,
sum(case when l.session_id = l1.session_id and l1.user_id != l.user_id
then 1 end)
from logs l join logs l1 on datediff(ss,l1.session_timestamp , l.session_timestamp) >0
group by l.session_id, l.user_id, l.session_timestamp
)
select c.id, c.uid, min(c.t) start, max(c.t)
from cte c group by c.c, c.id, c.uid order by start
Thanks alex for giving hint (l1.user_id != l.user_id ).
CodePudding user response:
With PostGres
WITH list AS
(
SELECT session_id
, user_id
, session_timestamp AS current
, session_timestamp > COALESCE(lag(session_timestamp, 1) OVER w interval '1 second', session_timestamp - interval '1 second') AS _start
, session_timestamp interval '1 second' < COALESCE(lead(session_timestamp, 1) OVER w, session_timestamp interval '2 second') AS _end
FROM logs
WINDOW w AS (PARTITION BY session_id, user_id ORDER BY session_timestamp ASC ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
)
SELECT s.session_id
, s.user_id
, s.session_start_time
, e.session_end_time
FROM
( SELECT session_id, user_id, current AS session_start_time
FROM list
WHERE _start
) AS s
CROSS JOIN LATERAL
( SELECT l.current AS session_end_time
FROM list AS l
WHERE _end
AND l.session_id = s.session_id
AND l.user_id = s.user_id
AND l.current > s.session_start_time
ORDER BY l.current ASC
LIMIT 1
) AS e
ORDER BY s.session_id, s.user_id, s.session_start_time
test result in dbfiddle