Home > Net >  Repeated data on inserted rows
Repeated data on inserted rows

Time:07-10

--demo setup 
drop table if exists dbo.product
go

create table dbo.Product
(
    ProductId int,
    ProductTitle varchar(55),
    ProductCategory varchar(255),
    Loaddate datetime
)

insert into dbo.Product
values (1, 'Table', 'ABCD', '3/4/2018'),
       (1, 'Table', 'ABCD', '3/5/2018'),
       (1, 'Table', 'ABCD', '3/6/2018'),
       (1, 'Table', 'XYZ', '3/7/2018'),
       (1, 'Table', 'XYZ', '3/8/2018'),
       (1, 'Table', 'XYZ', '3/9/2018'),
       (1, 'Table', 'GHI', '3/10/2018'),
       (1, 'Table', 'GHI', '3/11/2018'),
       (1, 'Table', 'XYZ', '3/12/2018'),
       (1, 'Table', 'XYZ', '3/13/2018')
 
SELECT 
    product.productid, 
    product.producttitle, 
    product.productcategory, 
    MIN(product.loaddate) AS BeginDate, 
    -- ,max(product.LoadDate) as BeginDate1 
    CASE 
        WHEN MAX(product.loaddate) = MAX(oa.enddate1) 
            THEN '12/31/9999' 
            ELSE MAX(product.loaddate) 
    END AS EndDate 
FROM   
    dbo.product product 
CROSS APPLY
    (SELECT MAX(subproduct.loaddate) EndDate1 
     FROM dbo.product subproduct 
     WHERE subproduct.productid = product.productid) oa 
GROUP BY 
    productid, producttitle, productcategory

Output:

productid producttitle productcategory BeginDate EndDate
1 Table ABCD 2018-03-04 00:00:00.000 2018-03-06 00:00:00.000
1 Table XYZ 2018-03-07 00:00:00.000 9999-12-31 00:00:00.000
1 Table GHI 2018-03-10 00:00:00.000 2018-03-11 00:00:00.000

Desired output:

productid producttitle productcategory BeginDate EndDate
1 Table ABCD 2018-03-04 00:00:00.000 2018-03-06 00:00:00.000
1 Table XYZ 2018-03-07 00:00:00.000 2018-03-09 00:00:00.000
1 Table GHI 2018-03-10 00:00:00.000 2018-03-11 00:00:00.000
1 Table XYZ 2018-03-12 00:00:00.000 9999-12-31 00:00:00.000

The last two inserted rows repeat the data from Loaddate '3/7/2018'-'3/9/2018', this doesn't happen if any of the new inserted rows doesn't repeat data. The only thing that changes is the LoadDate, giving me incorrect output. how can i get something like that desired output?

CodePudding user response:

Well, first of all, you need to find a sequence number over all your records. If you already have a primary key, that's good. In example you gave us, there's no such column, so let's generate it. Then, we make pairs with start and end dates for each product's category change. Another thing is to group all these product's category changes. Finally, we make just a simple group by:

;
with cte as (   select  *,
                        row_number() over(partition by ProductId order by Loaddate) as rn
                from product 
), cte2 as (    select  t1.ProductId, 
                        t1.ProductTitle, 
                        t1.ProductCategory, 
                        t1.Loaddate                         as BeginDate,  
                        case 
                            when t1.ProductCategory <> t2.ProductCategory
                            then t1.Loaddate
                            else coalesce(t2.Loaddate, '12/31/9999') 
                        end as EndDate,
                        row_number() over(order by t1.ProductId, t1.Loaddate)                                   as rn_overall,
                        row_number() over(partition by t1.ProductId, t1.ProductCategory order by t1.Loaddate)   as rn_category
                from cte as t1
                    left join cte as t2
                        on t2.ProductId = t1.ProductId
                        and t2.rn = t1.rn   1
), cte3 as (    select  *, 
                        min(rn_overall) over (partition by ProductId, ProductCategory, rn_overall - rn_category) as product_group
                from cte2
)

select ProductId, ProductTitle, ProductCategory, min(BeginDate) as BeginDate, max(EndDate) as EndDate
from cte3
group by ProductId, ProductTitle, ProductCategory, product_group
order by ProductId, BeginDate
  • Related