I am using MySQL and am trying to replace/fill all null values in a column with the previous non-null value present within the same column.
I tried using the lag()
windows function, but I don't seem to get the expected value in return. I have the following data
DDA_Account | yr_mo | Transaction_Date | Current_Daily_Balance | Current_Daily_Balance_Expected
--------------------------------------------------------------------------------------------------
25470 2021-11 2021-11-30 19222.89 19222.89
25470 2021-12 2021-12-01 19298.12 19298.12
25470 2021-12 2021-12-02 19365.33 19365.3
25470 2021-12 2021-12-03 19588.58 19588.58
25470 2021-12 2021-12-04 NULL 19588.58
25470 2021-12 2021-12-05 NULL 19588.58
25470 2021-12 2021-12-06 NULL 19588.58
25470 2021-12 2021-12-07 19568.58 19568.58
25470 2021-12 2021-12-08 20086.06 20086.06
25470 2021-12 2021-12-09 20109.29 20109.29
25470 2021-12 2021-12-10 19446.72 19446.72
25470 2021-12 2021-12-11 NULL 19446.72
25470 2021-12 2021-12-12 NULL 19446.72
For ex. I'd like to replace the date range '2021-12-04' to '2021-12-06' with the last non-null value, that of '2021-12-03.' Like I previously mentioned, I tried using the lag()
function but have been unsuccessful. I have placed the query I've been currently using below. Any help would be greatly appreciated
WITH recursive all_dates(dt) as
(
SELECT (SELECT MIN(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470) dt
UNION ALL
SELECT dt interval 1 day from all_dates where dt < (SELECT MAX(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470)
)
SELECT IF(ISNULL(m.DDA_Account)=1,'25470',DDA_Account) as DDA_Account, m.yr_mo, l.dt as Transaction_Date, m.Current_Daily_Balance, m.moving_avg, m.Row_Num,
IF(ISNULL(m.Current_Daily_Balance)=1, lag(m.Current_Daily_Balance,1) OVER (PARTITION BY DDA_Account ORDER BY Transaction_Date ASC), m.Current_Daily_Balance) as Current_Daily_Balance_Expected
FROM all_dates l
LEFT JOIN
(With cte_acct_2 as
(SELECT c.DDA_Account, date_format(Transaction_Date, '%Y-%m') as yr_mo, c.Transaction_Date, c.Current_Daily_Balance
From
(
SELECT a.DDA_Account, a.Transaction_Date, a.Transaction_Amount, a.Sum_D_C as Current_Daily_Balance
FROM
(SELECT d.DDA_Account, d.Transaction_Date, d.Debit_or_Credit, d.Transaction_Amount,
SUM(D_C_Amount) OVER (partition by DDA_Account order by Transaction_Date) as Sum_D_C
FROM
(SELECT DDA_Account, Transaction_Date, Debit_or_Credit, Transaction_Amount,
CASE WHEN Debit_or_Credit = 'Credit' Then Transaction_Amount Else -1*Transaction_Amount END AS D_C_Amount
FROM test_acct
WHERE DDA_Account = '25470') d) a
GROUP BY Transaction_Date
ORDER BY Transaction_Date ASC) c)
SELECT DDA_Account, yr_mo, Transaction_Date, Current_Daily_Balance,
ROUND(AVG(Current_Daily_Balance) OVER (partition by DDA_Account, yr_mo order by Transaction_Date),2) as moving_avg,
Row_number() OVER (PARTITION BY DDA_Account, Current_Daily_Balance, yr_mo ORDER BY Transaction_Date ASC) AS Row_Num
FROM cte_acct_2
ORDER BY DDA_Account, Transaction_Date) m ON m.Transaction_Date = l.dt
ORDER BY l.dt ASC;
CodePudding user response:
I was able to use session variables to solve this problem.
SELECT
NV.DDA_Account, NV.yr_mo, NV.Transaction_Date, NV.Current_Daily_Balance, CASE WHEN NV.Current_Daily_Balance IS NULL THEN
@prev
ELSE @prev := NV.Current_Daily_Balance
END AS Expected_Current_Daily_Balance, NV.moving_avg, NV.Row_Num
FROM
(
WITH recursive all_dates(dt) as
(
SELECT (SELECT MIN(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470) dt
UNION ALL
SELECT dt interval 1 day from all_dates where dt < (SELECT MAX(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470)
)
SELECT IF(ISNULL(m.DDA_Account)=1,'25470',DDA_Account) as DDA_Account, m.yr_mo, l.dt as Transaction_Date, m.Current_Daily_Balance, m.moving_avg, m.Row_Num
FROM all_dates l
LEFT JOIN
(With cte_acct_2 as
(SELECT c.DDA_Account, date_format(Transaction_Date, '%Y-%m') as yr_mo, c.Transaction_Date, c.Current_Daily_Balance
From
(
SELECT a.DDA_Account, a.Transaction_Date, a.Transaction_Amount, a.Sum_D_C as Current_Daily_Balance
FROM
(SELECT d.DDA_Account, d.Transaction_Date, d.Debit_or_Credit, d.Transaction_Amount,
SUM(D_C_Amount) OVER (partition by DDA_Account order by Transaction_Date) as Sum_D_C
FROM
(SELECT DDA_Account, Transaction_Date, Debit_or_Credit, Transaction_Amount,
CASE WHEN Debit_or_Credit = 'Credit' Then Transaction_Amount Else -1*Transaction_Amount END AS D_C_Amount
FROM test_acct
WHERE DDA_Account = '25470') d) a
GROUP BY Transaction_Date
ORDER BY Transaction_Date ASC) c)
SELECT DDA_Account, yr_mo, Transaction_Date, Current_Daily_Balance,
ROUND(AVG(Current_Daily_Balance) OVER (partition by DDA_Account, yr_mo order by Transaction_Date),2) as moving_avg,
Row_number() OVER (PARTITION BY DDA_Account, Current_Daily_Balance, yr_mo ORDER BY Transaction_Date ASC) AS Row_Num
FROM cte_acct_2
ORDER BY DDA_Account, Transaction_Date) m ON m.Transaction_Date = l.dt
ORDER BY l.dt ASC) NV;
CodePudding user response:
Your solution seems rather overly complex, based on your description the following correlated subquery is all you need:
select *,
Coalesce(
Current_Daily_Balance, (
select Current_Daily_Balance
from t t2
where t2.DDA_Account = t.DDA_Account
and t2.Transaction_Date < t.Transaction_Date
and t2.Current_Daily_Balance is not null
order by t2.Transaction_Date desc
limit 1
)
) Current_Daily_Balance_Expected
from t;
See demo fiddle