MySQL: Keep first occurrence row by id first change value in column-CodePudding

I am trying to extract 1) the first occurrence of a risk score and 2) any changes in risk score within an individual in MySQL. Given the following table,

rn	ID	Risk_Score	Recorded_Time
1	1	0	2022-03-01 00:01:00.000
2	1	0	2022-03-02 12:01:00.000
3	1	1	2022-03-03 23:01:00.000
4	1	1	2022-03-04 17:02:00.000
5	2	1	2022-03-23 07:01:00.000
6	2	1	2022-03-23 11:01:00.000
7	3	0	2022-03-01 00:01:00.000
8	3	0	2022-03-03 04:01:00.000
9	3	1	2022-03-03 14:01:00.000
10	3	1	2022-03-04 06:01:00.000
11	3	0	2022-03-04 17:02:00.000
12	3	1	2022-03-05 05:02:00.000
13	3	1	2022-03-06 14:02:00.000
14	3	0	2022-03-07 02:02:00.000
15	3	0	2022-03-11 09:02:00.000
16	3	1	2022-03-13 19:01:00.000
17	3	1	2022-03-15 22:01:00.000
18	3	3	2022-03-18 18:01:00.000
19	3	3	2022-03-19 14:01:00.000
20	3	1	2022-03-19 19:01:00.000

My output needs to be:

ID	Risk_Score	Recorded_Time
1	0	2022-03-01 00:01:00.000
1	1	2022-03-03 23:01:00.000
2	1	2022-03-23 07:01:00.000
3	0	2022-03-01 00:01:00.000
3	1	2022-03-03 14:01:00.000
3	0	2022-03-04 17:02:00.000
3	1	2022-03-05 05:02:00.000
3	0	2022-03-07 02:02:00.000
3	1	2022-03-13 19:01:00.000
3	3	2022-03-18 18:01:00.000
3	1	2022-03-19 19:01:00.000

I tried the following codes:

CREATE TABLE test_run2
(
rn int,
ID1 int,
Risk_Score int,
Recorded_Time timestamp
);

INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (1, 1, 0, '2022-03-01 00:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (2, 1, 0, '2022-03-02 12:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (3, 1, 1, '2022-03-03 23:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (4, 1, 1, '2022-03-04 17:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (5, 2, 1, '2022-03-23 07:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (6, 2, 1, '2022-03-23 11:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (7, 3, 0, '2022-03-01 00:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (8, 3, 0, '2022-03-03 04:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (9, 3, 1, '2022-03-03 14:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (10, 3, 1, '2022-03-04 06:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (11, 3, 0, '2022-03-04 17:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (12, 3, 1, '2022-03-05 05:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (13, 3, 1, '2022-03-06 14:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (14, 3, 0, '2022-03-07 02:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (15, 3, 0, '2022-03-11 09:02:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (16, 3, 1, '2022-03-13 19:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (17, 3, 1, '2022-03-15 22:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (18, 3, 3, '2022-03-18 18:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (19, 3, 3, '2022-03-19 14:01:00.000');
INSERT INTO test_run2 (rn, ID1, RISK_SCORE, RECORDED_TIME) VALUES (20, 3, 1, '2022-03-19 19:01:00.000');

SELECT * FROM test_run2;

SELECT test_run2.ID1, 
        test_run2.Risk_Score, 
        test_run2.Recorded_Time 

    FROM test_run2 
                                            
    INNER JOIN test_run2 AS y
        ON test_run2.rn = y.rn   1
        AND test_run2.Risk_Score <> y.Risk_Score
            
    ORDER BY ID1;

But the results were missing the following outputs:

ID	Risk_Score	Recorded_Time
1	0	2022-03-01 00:01:00.000
2	1	2022-03-23 07:01:00.000

What should I do to ensure that all relevant points are extracted? I'm new to mySQL and will appreciate any advice on my issue. Thank you.

CodePudding user response：

You need to establish change and if no previous row exists

select * 
from t
where (Select risk_score from t t1 where t1.id1 = t.id1 and t1.rn < t.rn order by t1.rn desc limit 1) is null or
      t.risk_score <> (Select risk_score from t t1 where t1.id1 = t.id1 and t1.rn < t.rn order by t1.rn desc limit 1)
;

 ------ ------ ------------ --------------------- 
| rn   | ID1  | Risk_Score | Recorded_Time       |
 ------ ------ ------------ --------------------- 
|    1 |    1 |          0 | 2022-03-01 00:01:00 |
|    3 |    1 |          1 | 2022-03-03 23:01:00 |
|    5 |    2 |          1 | 2022-03-23 07:01:00 |
|    7 |    3 |          0 | 2022-03-01 00:01:00 |
|    9 |    3 |          1 | 2022-03-03 14:01:00 |
|   11 |    3 |          0 | 2022-03-04 17:02:00 |
|   12 |    3 |          1 | 2022-03-05 05:02:00 |
|   14 |    3 |          0 | 2022-03-07 02:02:00 |
|   16 |    3 |          1 | 2022-03-13 19:01:00 |
|   18 |    3 |          3 | 2022-03-18 18:01:00 |
|   20 |    3 |          1 | 2022-03-19 19:01:00 |
 ------ ------ ------------ --------------------- 
11 rows in set (0.021 sec)

If you have version 8 or above

with cte as 
(Select t.*, lag(risk_score) over (partition by id1 order by rn) prev from t)
select * from cte where prev is null or risk_score <> prev