CREATE TABLE inbound (
id SERIAL PRIMARY KEY,
campaign VARCHAR,
expected_inbound_date DATE,
expected_inbound_quantity DECIMAL,
received_inbound_quantity DECIMAL
);
INSERT INTO inbound
(campaign, expected_inbound_date, expected_inbound_quantity, received_inbound_quantity)
VALUES
('C001', '2022-05-03', '500', '0'),
('C001', '2022-05-03', '800', '0'),
('C001', '2022-05-03', '400', '0'),
('C001', '2022-05-03', '200', '0'),
('C001', NULL, '0', '700'),
('C002', '2022-08-20', '3000', '0'),
('C002', '2022-08-20', '5000', '0'),
('C002', '2022-08-20', '2800', '0'),
('C002', NULL, '0', '4000');
Expected Result
campaign | expected_inbound_date | expected_inbound_quantity | split_received_inbound_quantity
---------|------------------------|-----------------------------|----------------------------------
C001 | 2022-05-03 | 200 | 200
C001 | 2022-05-03 | 400 | 400
C001 | 2022-05-03 | 500 | 100
C001 | 2022-05-03 | 800 | 0
C001 | | | 700
---------|------------------------|-----------------------------|----------------------------------
C002 | 2022-08-20 | 3.800 | 3.800
C002 | 2022-08-20 | 5.000 | 200
C002 | 2022-08-20 | 2.800 | 0
C002 | | | 4.000
I want to split the received_inbound_quantity
to each row of the expected_inbound_quantity
until the total of the received_inbound_quantity
is reached.
With reference to the answer in this question I tried to go with this solution:
SELECT
i.campaign AS campaign,
i.expected_inbound_date AS expected_inbound_date,
i.expected_inbound_quantity AS expected_inbound_quantity,
i.received_inbound_quantity AS received_inbound_quantity,
(SELECT
GREATEST(
LEAST(i.expected_inbound_quantity,
(SELECT
SUM(i3.received_inbound_quantity)
FROM inbound i3
WHERE i.campaign = i3.campaign) -
(
SELECT
t1.cumulated_value AS cumulated_value
FROM
(SELECT
i2.campaign,
i2.expected_inbound_date,
i2.expected_inbound_quantity,
i2.received_inbound_quantity,
SUM(i2.expected_inbound_quantity) OVER (PARTITION BY i2.campaign ORDER BY i2.expected_inbound_date, i2.expected_inbound_quantity, i2.received_inbound_quantity ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS cumulated_value
FROM inbound i2
GROUP BY 1,2,3,4) t1
WHERE (t1.campaign, t1.expected_inbound_date, t1.expected_inbound_quantity, t1.received_inbound_quantity) = (i.campaign, i.expected_inbound_date, i.expected_inbound_quantity, i.received_inbound_quantity)
)
),
0
)
) AS split
FROM inbound i
GROUP BY 1,2,3,4
ORDER BY 1,2,3,4
However, in redshift I get error:
Invalid operation: This type of correlated subquery pattern is not supported yet;
How do I need to modify the query to also make it work in redshift?
CodePudding user response:
Window functions are your friend. When you have a query that compares rows you should first look to window functions on Redshift. This simpler, cleaner, and faster than any self joining pattern.
select
campaign,
expected_inbound_date,
expected_inbound_quantity,
received_inbound_quantity,
case when (inbound_total - inbound_sum) >= 0 then expected_inbound_quantity
else case when (expected_inbound_quantity inbound_total - inbound_sum) >= 0 then expected_inbound_quantity inbound_total - inbound_sum
else 0 end
end as split
from (SELECT
campaign,
expected_inbound_date,
expected_inbound_quantity,
received_inbound_quantity,
sum(expected_inbound_quantity) over (partition by campaign order by expected_inbound_date, expected_inbound_quantity) as inbound_sum,
max(received_inbound_quantity) over (partition by campaign) as inbound_total
FROM inbound i
) subq
ORDER BY 1,2,3,4;
Updated fiddle here - https://dbfiddle.uk/?rdbms=postgres_13&fiddle=2381abdf5a90a997a4f05b809c892c40
When you port this to Redshift you may want to convert the CASE statements to DECODE() functions as these are more readable IMHO.
PS. Thank you for setting up the fiddle as this greatly speeds up providing an answer.