I'm curious if there is a better way to write this query, either syntactically to make simpler/easier to follow, or for processing speed.
I've been writing a lot of similar queries recently and hoping to streamline this. I'm not a developer by trade, but I have to dive into sql
quintiles table
costcenter | quintile | quintilevalue |
---|---|---|
A | Max | 50 |
A | Q1 | 8 |
A | Q2 | 12 |
A | Q3 | 14 |
A | Q4 | 18 |
B | Max | 45 |
B | Q1 | 5 |
B | Q2 | 10 |
B | Q3 | 12 |
B | Q4 | 16 |
employees table
costcenter | employee | hiredate |
---|---|---|
A | W | 2021-01-01 |
A | X | 2021-02-08 |
B | Y | 2020-12-16 |
B | Z | 2021-01-15 |
workcomplete table
employee | workdate | widgetsassembled |
---|---|---|
W | 2021-02-26 | 4 |
W | 2021-03-05 | 5 |
X | 2021-05-24 | 6 |
X | 2021-05-31 | 3 |
Y | 2021-04-07 | 2 |
Y | 2021-04-14 | 8 |
Z | 2021-02-07 | 4 |
Z | 2021-02-14 | 1 |
My goal: for each record in the workcomplete table, find out what the tenure was when the employee did the work and what quintile it falls in.
employee | workdate | widgetsassembled | costcenter | tenure | smallestquintile | quintile |
---|---|---|---|---|---|---|
W | 2021-02-26 | 4 | A | 8 | 8 | Q1 |
W | 2021-03-05 | 5 | A | 9 | 12 | Q2 |
X | 2021-05-24 | 6 | A | 15 | 18 | Q4 |
X | 2021-05-31 | 3 | A | 16 | 18 | Q4 |
Y | 2021-04-07 | 2 | B | 16 | 16 | Q4 |
Y | 2021-04-14 | 8 | B | 17 | 45 | Max |
Z | 2021-02-07 | 4 | B | 4 | 5 | Q1 |
Z | 2021-02-14 | 1 | B | 5 | 5 | Q1 |
This is what I did, it works fine:
WITH quintiles AS (
SELECT 'A' as costcenter
,'Q1' as quintile
,8 as quintilevalue
UNION SELECT 'A','Q2',12
UNION SELECT 'A','Q3',14
UNION SELECT 'A','Q4',18
UNION SELECT 'A','Max',50
UNION SELECT 'B','Q1',5
UNION SELECT 'B','Q2',10
UNION SELECT 'B','Q3',12
UNION SELECT 'B','Q4',16
UNION SELECT 'B','Max',45
),
employees AS
(
SELECT 'A' as costcenter
,'W' as employee
,'2021-01-01' as hiredate
UNION SELECT 'A','X','2021-02-08'
UNION SELECT 'B','Y','2020-12-16'
UNION SELECT 'B','Z','2021-01-15'
),
workcomplete AS
(
SELECT 'W' as employee
,'2021-02-26' as workdate
,4 as widgetsassembled
UNION SELECT 'W','2021-03-05',5
UNION SELECT 'X','2021-05-24',6
UNION SELECT 'X','2021-05-31',3
UNION SELECT 'Y','2021-04-07',2
UNION SELECT 'Y','2021-04-14',8
UNION SELECT 'Z','2021-02-07',4
UNION SELECT 'Z','2021-02-14',1
)
SELECT t.*
,q.quintile
FROM (
SELECT wc.employee
,wc.workdate
,wc.widgetsassembled
,e.costcenter
,DATEDIFF(week,e.hiredate,wc.workdate) AS tenure
,MIN(q.quintilevalue) as smallestquintile
FROM workcomplete wc
LEFT JOIN employees e
ON wc.employee = e.employee
LEFT JOIN quintiles q
ON q.costcenter = e.costcenter and DATEDIFF(week,e.hiredate,wc.workdate) <= q.quintilevalue
GROUP BY wc.employee
,wc.workdate
,wc.widgetsassembled
,e.costcenter
,DATEDIFF(week,e.hiredate,wc.workdate)
)t
LEFT JOIN quintiles q
ON t.smallestquintile = q.quintilevalue and t.costcenter = q.costcenter
This also works.
WITH quintiles AS (
SELECT 'A' as costcenter
,'Q1' as quintile
,8 as quintilevalue
UNION SELECT 'A','Q2',12
UNION SELECT 'A','Q3',14
UNION SELECT 'A','Q4',18
UNION SELECT 'A','Max',50
UNION SELECT 'B','Q1',5
UNION SELECT 'B','Q2',10
UNION SELECT 'B','Q3',12
UNION SELECT 'B','Q4',16
UNION SELECT 'B','Max',45
),
employees AS
(
SELECT 'A' as costcenter
,'W' as employee
,'2021-01-01' as hiredate
UNION SELECT 'A','X','2021-02-08'
UNION SELECT 'B','Y','2020-12-16'
UNION SELECT 'B','Z','2021-01-15'
),
workcomplete AS
(
SELECT 'W' as employee
,'2021-02-26' as workdate
,4 as widgetsassembled
UNION SELECT 'W','2021-03-05',5
UNION SELECT 'X','2021-05-24',6
UNION SELECT 'X','2021-05-31',3
UNION SELECT 'Y','2021-04-07',2
UNION SELECT 'Y','2021-04-14',8
UNION SELECT 'Z','2021-02-07',4
UNION SELECT 'Z','2021-02-14',1
)
SELECT t.*
,q.quintile
FROM (
SELECT DISTINCT wc.employee
,wc.workdate
,wc.widgetsassembled
,e.costcenter
,DATEDIFF(week,e.hiredate,wc.workdate) AS tenure
,MIN(q.quintilevalue) OVER (PARTITION BY wc.employee, wc.workdate) as smallestquintile
FROM workcomplete wc
LEFT JOIN employees e
ON wc.employee = e.employee
LEFT JOIN quintiles q
ON q.costcenter = e.costcenter and DATEDIFF(week,e.hiredate,wc.workdate) <= q.quintilevalue
)t
LEFT JOIN quintiles q
ON t.smallestquintile = q.quintilevalue and t.costcenter = q.costcenter
Is there a simpler way to do this, without nesting selects?
CodePudding user response:
It sounds like you just need a top-1-per-group query.
The standard solution for that is to use ROW_NUMBER
WITH quintiles AS (
SELECT 'A' as costcenter
,'Q1' as quintile
,8 as quintilevalue
UNION SELECT 'A','Q2',12
UNION SELECT 'A','Q3',14
UNION SELECT 'A','Q4',18
UNION SELECT 'A','Max',50
UNION SELECT 'B','Q1',5
UNION SELECT 'B','Q2',10
UNION SELECT 'B','Q3',12
UNION SELECT 'B','Q4',16
UNION SELECT 'B','Max',45
),
employees AS
(
SELECT 'A' as costcenter
,'W' as employee
,'2021-01-01' as hiredate
UNION SELECT 'A','X','2021-02-08'
UNION SELECT 'B','Y','2020-12-16'
UNION SELECT 'B','Z','2021-01-15'
),
workcomplete AS
(
SELECT 'W' as employee
,'2021-02-26' as workdate
,4 as widgetsassembled
UNION SELECT 'W','2021-03-05',5
UNION SELECT 'X','2021-05-24',6
UNION SELECT 'X','2021-05-31',3
UNION SELECT 'Y','2021-04-07',2
UNION SELECT 'Y','2021-04-14',8
UNION SELECT 'Z','2021-02-07',4
UNION SELECT 'Z','2021-02-14',1
)
SELECT t.*
FROM (
SELECT wc.employee
,wc.workdate
,wc.widgetsassembled
,e.costcenter
,DATEDIFF(week,e.hiredate,wc.workdate) AS tenure
,q.quintile
,ROW_NUMBER() OVER (PARTITION BY wc.employee, wc.workdate ORDER BY q.quintilevalue) as rn
FROM workcomplete wc
LEFT JOIN employees e
ON wc.employee = e.employee
LEFT JOIN quintiles q
ON q.costcenter = e.costcenter and DATEDIFF(week,e.hiredate,wc.workdate) <= q.quintilevalue
)t
WHERE rn = 1;