Home > Software design >  Replace Null Values with the Previous Non-Null Value in MySQL
Replace Null Values with the Previous Non-Null Value in MySQL

Time:01-04

I am using MySQL and am trying to replace/fill all null values in a column with the previous non-null value present within the same column. I tried using the lag() windows function, but I don't seem to get the expected value in return. I have the following data

DDA_Account | yr_mo | Transaction_Date | Current_Daily_Balance  | Current_Daily_Balance_Expected
--------------------------------------------------------------------------------------------------
25470        2021-11   2021-11-30          19222.89                19222.89
25470        2021-12   2021-12-01          19298.12                19298.12
25470        2021-12   2021-12-02          19365.33                19365.3
25470        2021-12   2021-12-03          19588.58                19588.58
25470        2021-12   2021-12-04             NULL                 19588.58
25470        2021-12   2021-12-05             NULL                 19588.58
25470        2021-12   2021-12-06             NULL                 19588.58
25470        2021-12   2021-12-07          19568.58                19568.58
25470        2021-12   2021-12-08          20086.06                20086.06
25470        2021-12   2021-12-09          20109.29                20109.29
25470        2021-12   2021-12-10          19446.72                19446.72
25470        2021-12   2021-12-11             NULL                 19446.72
25470        2021-12   2021-12-12             NULL                 19446.72

For ex. I'd like to replace the date range '2021-12-04' to '2021-12-06' with the last non-null value, that of '2021-12-03.' Like I previously mentioned, I tried using the lag() function but have been unsuccessful. I have placed the query I've been currently using below. Any help would be greatly appreciated

WITH recursive all_dates(dt) as 
(
SELECT (SELECT MIN(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470) dt
UNION ALL
SELECT dt   interval 1 day from all_dates where dt < (SELECT MAX(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470)
)
SELECT IF(ISNULL(m.DDA_Account)=1,'25470',DDA_Account) as DDA_Account, m.yr_mo, l.dt as Transaction_Date, m.Current_Daily_Balance, m.moving_avg, m.Row_Num,
IF(ISNULL(m.Current_Daily_Balance)=1, lag(m.Current_Daily_Balance,1) OVER (PARTITION BY DDA_Account ORDER BY Transaction_Date ASC), m.Current_Daily_Balance) as Current_Daily_Balance_Expected
FROM all_dates l 
LEFT JOIN 
(With cte_acct_2 as 
(SELECT c.DDA_Account, date_format(Transaction_Date, '%Y-%m') as yr_mo, c.Transaction_Date, c.Current_Daily_Balance
From
(
SELECT a.DDA_Account, a.Transaction_Date, a.Transaction_Amount, a.Sum_D_C as Current_Daily_Balance
FROM
(SELECT d.DDA_Account, d.Transaction_Date, d.Debit_or_Credit, d.Transaction_Amount,
SUM(D_C_Amount) OVER (partition by DDA_Account order by Transaction_Date) as Sum_D_C
FROM
(SELECT DDA_Account, Transaction_Date, Debit_or_Credit, Transaction_Amount,
CASE WHEN Debit_or_Credit = 'Credit' Then Transaction_Amount Else -1*Transaction_Amount END AS D_C_Amount
FROM test_acct 
WHERE DDA_Account = '25470') d) a 
GROUP BY Transaction_Date
ORDER BY Transaction_Date ASC) c)
SELECT DDA_Account, yr_mo, Transaction_Date, Current_Daily_Balance,
ROUND(AVG(Current_Daily_Balance) OVER (partition by DDA_Account, yr_mo order by Transaction_Date),2) as moving_avg,
Row_number() OVER (PARTITION BY DDA_Account, Current_Daily_Balance, yr_mo ORDER BY Transaction_Date ASC) AS Row_Num
FROM cte_acct_2
ORDER BY DDA_Account, Transaction_Date) m ON m.Transaction_Date = l.dt
ORDER BY l.dt ASC;

CodePudding user response:

I was able to use session variables to solve this problem.

SELECT 
NV.DDA_Account, NV.yr_mo, NV.Transaction_Date, NV.Current_Daily_Balance, CASE WHEN NV.Current_Daily_Balance IS NULL THEN 
@prev
ELSE @prev := NV.Current_Daily_Balance
END AS Expected_Current_Daily_Balance, NV.moving_avg, NV.Row_Num
FROM
(
WITH recursive all_dates(dt) as 
(
SELECT (SELECT MIN(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470) dt
UNION ALL
SELECT dt   interval 1 day from all_dates where dt < (SELECT MAX(Transaction_Date) FROM test_acct WHERE DDA_Account = 25470)
)
SELECT IF(ISNULL(m.DDA_Account)=1,'25470',DDA_Account) as DDA_Account, m.yr_mo, l.dt as Transaction_Date, m.Current_Daily_Balance, m.moving_avg, m.Row_Num
FROM all_dates l 
LEFT JOIN 
(With cte_acct_2 as 
(SELECT c.DDA_Account, date_format(Transaction_Date, '%Y-%m') as yr_mo, c.Transaction_Date, c.Current_Daily_Balance
From
(
SELECT a.DDA_Account, a.Transaction_Date, a.Transaction_Amount, a.Sum_D_C as Current_Daily_Balance
FROM
(SELECT d.DDA_Account, d.Transaction_Date, d.Debit_or_Credit, d.Transaction_Amount,
SUM(D_C_Amount) OVER (partition by DDA_Account order by Transaction_Date) as Sum_D_C
FROM
(SELECT DDA_Account, Transaction_Date, Debit_or_Credit, Transaction_Amount,
CASE WHEN Debit_or_Credit = 'Credit' Then Transaction_Amount Else -1*Transaction_Amount END AS D_C_Amount
FROM test_acct 
WHERE DDA_Account = '25470') d) a 
GROUP BY Transaction_Date
ORDER BY Transaction_Date ASC) c)
SELECT DDA_Account, yr_mo, Transaction_Date, Current_Daily_Balance,
ROUND(AVG(Current_Daily_Balance) OVER (partition by DDA_Account, yr_mo order by Transaction_Date),2) as moving_avg,
Row_number() OVER (PARTITION BY DDA_Account, Current_Daily_Balance, yr_mo ORDER BY Transaction_Date ASC) AS Row_Num
FROM cte_acct_2
ORDER BY DDA_Account, Transaction_Date) m ON m.Transaction_Date = l.dt
ORDER BY l.dt ASC) NV;

CodePudding user response:

Your solution seems rather overly complex, based on your description the following correlated subquery is all you need:

select *,
  Coalesce(
    Current_Daily_Balance, (
      select Current_Daily_Balance 
      from t t2 
      where t2.DDA_Account = t.DDA_Account 
        and t2.Transaction_Date < t.Transaction_Date 
        and t2.Current_Daily_Balance is not null 
        order by t2.Transaction_Date desc
        limit 1
      )
    ) Current_Daily_Balance_Expected
from t;

See demo fiddle

  • Related