Home > front end >  SMSS SQL Combine two tables with duplicate ID entries
SMSS SQL Combine two tables with duplicate ID entries

Time:02-24

Using SMSS (SQL) I want to combine Table1 and Table2, which both have duplicate ID entries. I need the Due Dates for each ID to appear in a row against their multiple Purchase dates (as can be seen in Table3).

Notes:

  • I've estimated I will need a max of 10 Due date columns
  • If there are no more due dates associated with that ID I want the cells to appear empty.
  • This is an extract of a much larger data set

Table1:

ID Due_Date
2245873 02-JAN
2245873 10-JAN
2245873 13-JAN
8394313 03-JAN
8394313 09-JAN
3446512 31-DEC
3446512 20-JAN
617828 31-DEC
617828 20-JAN
61342 02-JAN

Table2:

ID Purchase_Date Cost
2245873 29-DEC 3.00
2245873 30-DEC 4.30
2245873 01-JAN 6.00
2245873 01-JAN 6.20
8394313 26-DEC 3.20
8394313 27-DEC 3.20
8394313 01-JAN 3.30
8394313 02-JAN 3.20
3446512 31-DEC 3.00
3446512 20-DEC 4.00
617828 31-DEC 4.20
617828 28-DEC 3.50
617828 20-DEC 3.60
61342 29-DEC 4.20
61342 30-DEC 4.30

Table3:

ID Purchase_Date Cost Due_date 1 Due_date 2 Due_date 3
2245873 29-DEC 3.00 02-JAN 10-JAN 13-JAN
2245873 30-DEC 4.30 02-JAN 10-JAN 13-JAN
2245873 01-JAN 6.00 02-JAN 10-JAN 13-JAN
2245873 01-JAN 6.20 02-JAN 10-JAN 13-JAN
8394313 26-DEC 3.20 03-JAN 09-JAN
8394313 27-DEC 3.20 03-JAN 09-JAN
8394313 01-JAN 3.30 03-JAN 09-JAN
8394313 02-JAN 3.20 03-JAN 09-JAN
3446512 31-DEC 3.00 31-DEC 20-JAN
3446512 20-DEC 4.00 31-DEC 20-DEC
3446512 21-DEC 4.00 31-DEC 20-DEC
617828 31-DEC 4.20 31-DEC 20-JAN
617828 28-DEC 3.50 31-DEC 20-JAN
617828 20-DEC 3.60 31-DEC 20-JAN
61342 29-DEC 4.20 02-JAN
61342 30-DEC 4.30 02-JAN

This is my hard code below, why is it not working?

with cte (ID, due_date, rowNo) as (
  select ID, due_date, row_number() over (partition by ID order by due_date)
  from dbo.table1 t1
),
pivoted (ID, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, due_date8, due_date9, due_date10)
AS
(
 select ID, due_date, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL from cte where rowNo = 1
 union all  
 select ID, due_date, due_date1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL from cte where rowNo = 2
 union all  
 select ID, due_date, due_date1, due_date2, due_date3, NULL, NULL, NULL, NULL, NULL, NULL, NULL from cte where rowNo = 3
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, NULL, NULL, NULL, NULL, NULL, NULL from cte where rowNo = 4
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, NULL, NULL, NULL, NULL, NULL from cte where rowNo = 5
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, NULL, NULL, NULL, NULL from cte where rowNo = 6
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, NULL, NULL, NULL from cte where rowNo = 7
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, due_date8, NULL, NULL from cte where rowNo = 8
 union all
  select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, due_date8, due_date9, NULL from cte where rowNo = 9
 union all
 select ID, due_date, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, due_date8, due_date9, due_date10 from cte where rowN0 = 10
 union all
),
pivSum (ID, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7, due_date8, due_date9, due_date10) 
AS
(
select ID, max(due_date1), max(due_date2), max(due_date3), max(due_date4), max(due_date5), max(due_date6), max(due_date7), max(due_date8), max(due_date9), max(due_date10) from pivoted group by ID
) 
select t2.ID, t2.purchase_date, t2.Cost, t1.due_date1, t1.due_date2, t1.due_date3, t1.due_date4, t1.due_date5, t1.due_date6, t1.due_date7, t1.due_date8, t1.due_date9, t1.due_date10
from dbo.table2 t2 
inner join pivSum t1 on t1.ID = t2.ID;

CodePudding user response:

For at max 15 columns as you say, you might hard code it like;

with cte (id, dueDate, rowNo) as (
  select id, due_Date, row_number() over (partition by id order by due_date)
  from table1
),
pivoted (id, due_date1, due_date2, ...) as (
 select id, due_date, NULL, NULL, NULL, NULL, ... from cte where rowNo = 1
 union all  
 select id, NULL, due_date, NULL, NULL, NULL, ... from cte where rowNo = 2
 ...
),
pivSum (id, due_date1, due_date2, ...) as (
select id, max(due_date1), ... from pivoted group by id
) 
select t2.id, t2.purchase_date, t2.cost, t1.due_date1, t1.due_date2, ...
from table2 t2 
inner join pivSum t1 on t1.id = t2.id;

When column count is unknown, or you don't want to do it with hardcoding, check dynamic pivoting.

EDIT: Here is a sample for 15 columns fixed:

DECLARE @table1 TABLE([ID] INT, [due_date] CHAR(6));
INSERT INTO @table1(ID, due_date)
VALUES(2245873, '02-JAN')
    , (2245873, '10-JAN')
    , (2245873, '13-JAN')
    , (8394313, '03-JAN')
    , (8394313, '09-JAN')
    , (3446512, '31-DEC')
    , (3446512, '20-JAN')
    , (617828, '31-DEC')
    , (617828, '20-JAN')
    , (61342, '02-JAN');

DECLARE @table2 TABLE([ID] INT, [Purchase_Date] CHAR(6), [Cost] MONEY);
INSERT INTO @table2(ID, Purchase_Date, Cost)
VALUES(2245873, '29-DEC', 3.0000)
    , (2245873, '30-DEC', 4.3000)
    , (2245873, '01-JAN', 6.0000)
    , (2245873, '01-JAN', 6.2000)
    , (8394313, '26-DEC', 3.2000)
    , (8394313, '27-DEC', 3.2000)
    , (8394313, '01-JAN', 3.3000)
    , (8394313, '02-JAN', 3.2000)
    , (3446512, '31-DEC', 3.0000)
    , (3446512, '20-DEC', 4.0000)
    , (617828, '31-DEC', 4.2000)
    , (617828, '28-DEC', 3.5000)
    , (617828, '20-DEC', 3.6000)
    , (61342, '29-DEC', 4.2000)
    , (61342, '30-DEC', 4.3000);

WITH
  cte(id, dueDate, rowNo) AS (
                               SELECT
                                    ID
                                  , due_date
                                  , ROW_NUMBER() OVER (PARTITION BY ID ORDER BY due_date)
                               FROM @table1 AS t
                             )
,
  pivoted(id, due_date1, due_date2, due_date3, due_date4, due_date5, due_date6, due_date7
        , due_date8, due_date9, due_date10, due_date11, due_date12, due_date13, due_date14
        , due_date15
         ) AS (
                SELECT
                         id
                       , MAX(CASE WHEN cte.rowNo=1 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=2 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=3 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=4 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=5 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=6 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=7 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=8 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=9 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=10 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=11 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=12 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=13 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=14 THEN dueDate END)
                       , MAX(CASE WHEN cte.rowNo=15 THEN dueDate END)
                FROM     cte
                GROUP BY cte.id
              )
SELECT
         t2.ID
       , t2.Purchase_Date
       , t2.Cost
       , t1.due_date1
       , t1.due_date2
       , t1.due_date3
       , t1.due_date4
       , t1.due_date5
       , t1.due_date6
       , t1.due_date7
       , t1.due_date8
       , t1.due_date9
       , t1.due_date10
       , t1.due_date11
       , t1.due_date12
       , t1.due_date13
       , t1.due_date14
       , t1.due_date15
FROM     @table2            t2
         INNER JOIN pivoted t1 ON t1.id=t2.ID
ORDER BY t2.ID, CONVERT(DATE, t2.Purchase_Date '-2000') DESC;
  • Related