--demo setup
drop table if exists dbo.product
go
create table dbo.Product
(
ProductId int,
ProductTitle varchar(55),
ProductCategory varchar(255),
Loaddate datetime
)
insert into dbo.Product
values (1, 'Table', 'ABCD', '3/4/2018'),
(1, 'Table', 'ABCD', '3/5/2018'),
(1, 'Table', 'ABCD', '3/6/2018'),
(1, 'Table', 'XYZ', '3/7/2018'),
(1, 'Table', 'XYZ', '3/8/2018'),
(1, 'Table', 'XYZ', '3/9/2018'),
(1, 'Table', 'GHI', '3/10/2018'),
(1, 'Table', 'GHI', '3/11/2018'),
(1, 'Table', 'XYZ', '3/12/2018'),
(1, 'Table', 'XYZ', '3/13/2018')
SELECT
product.productid,
product.producttitle,
product.productcategory,
MIN(product.loaddate) AS BeginDate,
-- ,max(product.LoadDate) as BeginDate1
CASE
WHEN MAX(product.loaddate) = MAX(oa.enddate1)
THEN '12/31/9999'
ELSE MAX(product.loaddate)
END AS EndDate
FROM
dbo.product product
CROSS APPLY
(SELECT MAX(subproduct.loaddate) EndDate1
FROM dbo.product subproduct
WHERE subproduct.productid = product.productid) oa
GROUP BY
productid, producttitle, productcategory
Output:
productid | producttitle | productcategory | BeginDate | EndDate |
---|---|---|---|---|
1 | Table | ABCD | 2018-03-04 00:00:00.000 | 2018-03-06 00:00:00.000 |
1 | Table | XYZ | 2018-03-07 00:00:00.000 | 9999-12-31 00:00:00.000 |
1 | Table | GHI | 2018-03-10 00:00:00.000 | 2018-03-11 00:00:00.000 |
Desired output:
productid | producttitle | productcategory | BeginDate | EndDate |
---|---|---|---|---|
1 | Table | ABCD | 2018-03-04 00:00:00.000 | 2018-03-06 00:00:00.000 |
1 | Table | XYZ | 2018-03-07 00:00:00.000 | 2018-03-09 00:00:00.000 |
1 | Table | GHI | 2018-03-10 00:00:00.000 | 2018-03-11 00:00:00.000 |
1 | Table | XYZ | 2018-03-12 00:00:00.000 | 9999-12-31 00:00:00.000 |
The last two inserted rows repeat the data from Loaddate '3/7/2018'-'3/9/2018', this doesn't happen if any of the new inserted rows doesn't repeat data. The only thing that changes is the LoadDate, giving me incorrect output. how can i get something like that desired output?
CodePudding user response:
Well, first of all, you need to find a sequence number over all your records. If you already have a primary key, that's good. In example you gave us, there's no such column, so let's generate it. Then, we make pairs with start and end dates for each product's category change. Another thing is to group all these product's category changes. Finally, we make just a simple group by:
;
with cte as ( select *,
row_number() over(partition by ProductId order by Loaddate) as rn
from product
), cte2 as ( select t1.ProductId,
t1.ProductTitle,
t1.ProductCategory,
t1.Loaddate as BeginDate,
case
when t1.ProductCategory <> t2.ProductCategory
then t1.Loaddate
else coalesce(t2.Loaddate, '12/31/9999')
end as EndDate,
row_number() over(order by t1.ProductId, t1.Loaddate) as rn_overall,
row_number() over(partition by t1.ProductId, t1.ProductCategory order by t1.Loaddate) as rn_category
from cte as t1
left join cte as t2
on t2.ProductId = t1.ProductId
and t2.rn = t1.rn 1
), cte3 as ( select *,
min(rn_overall) over (partition by ProductId, ProductCategory, rn_overall - rn_category) as product_group
from cte2
)
select ProductId, ProductTitle, ProductCategory, min(BeginDate) as BeginDate, max(EndDate) as EndDate
from cte3
group by ProductId, ProductTitle, ProductCategory, product_group
order by ProductId, BeginDate