CodePudding user response:
I think some of your sample data is incorrect but I did play with it and get a matching result, for the MPE data at least. You can accomplish this by first tagging the "distinctly counted" rows with an extra partition on CUST_ID
and then first ordering on FLAG DESC
. Then you would sum over that in the same way you hoped to apply count(distinct <expr>) over ...
WITH SE AS (
SELECT 1 LINE_ID, 'TW' MARKET_ID, 'X' LOCAL_POS_ID, 'MPE' BC_ID,
1 CUST_ID, '20200201' SALE_CREATION_DATE, 1 FLAG UNION ALL
SELECT 2, 'TW', 'X', 'MPE', 2, '20201005', 1 UNION ALL
SELECT 3, 'TW', 'X', 'MPE', 3, '20200415', 0 UNION ALL
SELECT 4, 'TW', 'X', 'MPE', 1, '20200223', 1 UNION ALL
SELECT 5, 'TW', 'X', 'MPE', 6, '20200217', 1 UNION ALL
SELECT 6, 'TW', 'X', 'MPE', 9, '20200715', 1 UNION ALL
SELECT 7, 'TW', 'X', 'MPE', 4, '20200223', 1 UNION ALL
SELECT 8, 'TW', 'X', 'MPE', 1, '20201008', 1 UNION ALL
SELECT 9, 'TW', 'X', 'MPE', 2, '20201019', 1 UNION ALL
SELECT 10, 'TW', 'X', 'MPE', 1, '20200516', 1 UNION ALL
SELECT 11, 'TW', 'X', 'MPE', 1, '20200129', 1 UNION ALL
SELECT 12, 'TW', 'X', 'MPE', 1, '20201007', 1 UNION ALL
SELECT 13, 'TW', 'X', 'MPE', 2, '20201005', 1 UNION ALL
SELECT 14, 'TW', 'X', 'MPE', 3, '20200505', 1 UNION ALL
SELECT 15, 'TW', 'X', 'MPE', 8, '20201103', 1 UNION ALL
SELECT 16, 'TW', 'X', 'MPE', 9, '20200820', 1
),
DATA AS (
SELECT *,
LEFT(SALE_CREATION_DATE, 6) AS SALE_MONTH,
LEFT(SALE_CREATION_DATE, 4) AS SALE_YEAR,
CASE ROW_NUMBER() OVER (
PARTITION BY MARKET_ID, LOCAL_POS_ID, BC_ID,
LEFT(SALE_CREATION_DATE, 4), CUST_ID
ORDER BY FLAG DESC, LEFT(SALE_CREATION_DATE, 6)
) WHEN 1 THEN FLAG END AS COUNTER /* assumes possible to have no flagged row */
FROM SE
)
SELECT MARKET_ID, LOCAL_POS_ID, BC_ID, SALE_MONTH,
SUM(SUM(COUNTER)) OVER (
PARTITION BY MARKET_ID, LOCAL_POS_ID, BC_ID, SALE_YEAR
ORDER BY SALE_MONTH
) AS NB_ACTIVE_CUSTOMERS
FROM DATA
GROUP BY MARKET_ID, LOCAL_POS_ID, BC_ID, SALE_YEAR, SALE_MONTH
ORDER BY MARKET_ID, LOCAL_POS_ID, BC_ID, SALE_YEAR, SALE_MONTH