Home > Enterprise >  Microsoft SQL Server 2016 - T-SQL language - 'Gaps & Islands' problem - tricky partition
Microsoft SQL Server 2016 - T-SQL language - 'Gaps & Islands' problem - tricky partition

Time:12-02

I have 5 employees with ID values : 111, 222, 333, 444, 555.

I have a source dataset that has data for all five of them on all calendar days between Mar 28, 2021 and Apr 5, 2021. (This is just a small sample subset of my actual data).

You can use these scripts to see the sample of the source data:

CREATE TABLE #SourceDataset
(
    EmployeeId int,
    PayGroupName varchar(100),
    PayTypeName varchar(100),
    SiteName varchar(100),
    Region varchar(100),
    PercentageScore int,
    Center int,
    [Function] varchar(100),
    Department varchar(100),
    EverySingleCalendarDate date
)

--- for Employee ID = 111 ---

INSERT INTO #SourceDataset
VALUES (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 100, 17, 'WQ', 'FG', '3/28/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 100, 17, 'WQ', 'FG', '3/29/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 100, 17, 'WQ', 'FG', '3/30/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 100, 17, 'WQ', 'FG', '3/31/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 100, 17, 'WQ', 'FG', '4/1/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 50, 56, 'WQ', 'FG', '4/2/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 50, 80, 'WQ', 'FG', '4/2/2021'),
       (111, 'Biweekly', 'Salaried', 'Shop Floor', 'NYC', 50, 56, 'WQ', 'FG', '4/3/2021'),
(111,   'Biweekly', 'Salaried', 'Shop Floor',   'NYC',  50, 80, 'WQ',   'FG',   '4/3/2021'),
(111,   'Biweekly', 'Salaried', 'Shop Floor',   'NYC',  50, 56, 'WQ',   'FG',   '4/4/2021'),
(111,   'Biweekly', 'Salaried', 'Shop Floor',   'NYC',  50, 80, 'WQ',   'FG',   '4/4/2021'),
(111,   'Biweekly', 'Salaried', 'Shop Floor',   'NYC',  50, 56, 'WQ',   'FG',   '4/5/2021'),
(111,   'Biweekly', 'Salaried', 'Shop Floor',   'NYC',  50, 80, 'WQ',   'FG',   '4/5/2021')

--- for Employee ID = 111 ---

--- for Employee ID = 222 ---

INSERT INTO #SourceDataset

VALUES

(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    19, 'WQ',   'FG',   '3/28/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    19, 'WQ',   'FG',   '3/29/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    19, 'WQ',   'FG',   '3/30/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    20, 'WR',   'FY',   '3/31/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    20, 'WR',   'FY',   '4/1/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  25, 19, 'RH',   'FG',   '4/2/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  75, 19, 'DP',   'FG',   '4/2/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  25, 19, 'RH',   'FG',   '4/3/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  75, 19, 'DP',   'FG',   '4/3/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    19, 'WQ',   'FG',   '4/4/2021'),
(222,   'Monthly',  'PartTime', 'Office Room',  'POR',  100,    19, 'WQ',   'FG',   '4/5/2021')

--- for Employee ID = 222 ---

--- for Employee ID = 333 ---

INSERT INTO #SourceDataset

VALUES

(333, 'Weekly',    'Contract',  'Store',    'ATL',  100,    19, 'WQ',   'FG',   '3/28/2021'),
(333, 'Weekly PT', 'Contract',  'Store',    'ATL',  100,    19, 'WQ',   'FG',   '3/29/2021'),
(333, 'Weekly PT', 'Temporary', 'Store',    'ATL',  100,    19, 'WQ',   'FG',   '3/30/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  100,    19, 'WQ',   'FG',   '3/31/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  100,    19, 'WQ',   'FG',   '4/1/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  40, 19, 'WQ',   'PQ',   '4/2/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  60, 19, 'WQ',   'AD',   '4/2/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  40, 19, 'WQ',   'PQ',   '4/3/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  60, 19, 'WQ',   'AD',   '4/3/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  40, 19, 'WQ',   'PQ',   '4/4/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  60, 19, 'WQ',   'AD',   '4/4/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  40, 19, 'WQ',   'PQ',   '4/5/2021'),
(333, 'Weekly',    'Contract',  'Store',    'ATL',  60, 19, 'WQ',   'AD',   '4/5/2021')

--- for Employee ID = 333 ---

--- for Employee ID = 444 ---

INSERT INTO #SourceDataset

VALUES

(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  100,    19, 'WQ',   'FG',   '3/28/2021'),
(444,   'Weekly',   'FT-Optional',  'StoreHouse',   'MIA',  100,    19, 'WQ',   'FG',   '3/29/2021'),
(444,   'Weekly',   'FT-Optional',  'StoreHouse',   'MIA',  100,    19, 'WQ',   'FG',   '3/30/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  100,    19, 'WQ',   'FG',   '3/31/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  100,    19, 'WQ',   'FG',   '4/1/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 10, 'AB',   'FG',   '4/2/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 11, 'HP',   'FG',   '4/2/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 10, 'AB',   'FG',   '4/3/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 11, 'HP',   'FG',   '4/3/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  65, 29, 'RR',   'FG',   '4/4/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  35, 37, 'QQ',   'FG',   '4/4/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 10, 'AB',   'FG',   '4/5/2021'),
(444,   'Weekly',   'FT',           'StoreHouse',   'MIA',  50, 11, 'HP',   'FG',   '4/5/2021')

--- for Employee ID = 444 ---

--- for Employee ID = 555 ---

INSERT INTO #SourceDataset

VALUES

(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    100,    19, 'WQ',   'FG',   '3/28/2021'),
(555,   'WeeklyOptional',           'PT',   'StoreHouse2',    'RIC',    100,    19, 'WQ',   'FG',   '3/29/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    100,    19, 'WQ',   'FG',   '3/30/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    100,    19, 'WQ',   'FG',   '3/31/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    100,    19, 'WQ',   'FG',   '4/1/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    40, 23, 'BB',   'MA',   '4/2/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    25, 24, 'CV',   'RU',   '4/2/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    35, 25, 'FJ',   'BN',   '4/2/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    40, 23, 'BB',   'MA',   '4/3/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    25, 24, 'CV',   'RU',   '4/3/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    35, 25, 'FJ',   'BN',   '4/3/2021'),
(555,   'Weekly-NonOptional',           'FT',   'StoreHouse156',  'RIC',    33, 23, 'BB',   'MA',   '4/4/2021'),
(555,   'Weekly-NonOptional',           'FT',   'StoreHouse156',  'RIC',    33, 24, 'CV',   'RU',   '4/4/2021'),
(555,   'Weekly-NonOptional',           'FT',   'StoreHouse156',  'RIC',    34, 25, 'FJ',   'BN',   '4/4/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    40, 23, 'BB',   'MA',   '4/5/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    25, 24, 'CV',   'RU',   '4/5/2021'),
(555,   'Weekly',                   'FT',   'StoreHouse1',    'RIC',    35, 25, 'FJ',   'BN',   '4/5/2021')

--- for Employee ID = 555 ---

SELECT *
FROM #SourceDataset
ORDER BY EmployeeId, EverySingleCalendarDate

---------------------------------------------------

Goal: I am trying to track how an employee attained 100 % Percentage Score over a period of time, and develop groups to display this information, with a new MinDate and MaxDate column, to show the range of dates for which a group existed. There is no gap in the calendar days for any employee, in the time period mentioned here (Mar 28, 2021 - Apr 5, 2021).

A note on the source dataset:

An employee may attain 100 % Percentage Score in one row for a given calendar day, with one unique value each, under each column - Center, Function, Department. Alternately she may attain 100 % Percentage Score on another calendar day, with different combinations of these 3 columns - Center, Function, Department - in multiple rows, with splits in Percentage Scores (say, 40 25 35 in three rows (or) 50 50 in two rows), which will add up to 100 % for that calendar day. See example queries on the source dataset to get an idea as to what I mean.

The source dataset has been developed in such a way that - every calendar day must account for 100 % for any employee, and there is NO gap in the calendar days. And for an employee, the change for these 4 columns (PayGroupName, PayTypeName, SiteName, Region) happen only at the calendar day level, NOT at the level of PercentageScore column or at the level of Center, Function, Department columns.

Example:

SELECT *
FROM #SourceDataset
WHERE EmployeeId = 111
AND EverySingleCalendarDate = '2021-03-28'
--100 % Percentage Score in one row--

SELECT *
FROM #SourceDataset
WHERE EmployeeId = 111
AND EverySingleCalendarDate = '2021-04-02'
--100 % Percentage Score in two rows (50   50)--

SELECT *
FROM #SourceDataset
WHERE EmployeeId = 555
AND EverySingleCalendarDate = '2021-04-02'
--100 % Percentage Score in three rows (40   25   35)--

My goal is to track every employee on how she attains 100 % Percentage Score over a period of time - from Mar 28 to Apr 5, with data available for ALL my 5 employees on ALL these days, with no gap in the calendar days in the source dataset.

The number of rows for an employee on a given calendar day, will depend on how she attains 100 % Percentage Score, either in one row or multiple rows. The 'number of rows' (on a calendar day for any employee) will essentially depend on the set of 3 columns - Center, Function, Department.

Another important point: the columns - PayGroupName, PayTypeName, SiteName, Region - will ALWAYS have exactly one unique value for an employee on a given calendar day. So the number of rows for an employee on a given calendar is NOT determined by these columns - PayGroupName, PayTypeName, SiteName, Region.

This said, the data under each of these columns (PayGroupName, PayTypeName, SiteName, Region) can change between calendar days for an employee, and we need to track this too. See example queries on the source dataset below:

SELECT *
FROM #SourceDataset
WHERE EmployeeId = 333
AND EverySingleCalendarDate BETWEEN '2021-03-28' AND '2021-03-30'
--PayGroupName and PayTypeName change between days for this employee (333)--

SELECT *
FROM #SourceDataset
WHERE EmployeeId = 555
AND EverySingleCalendarDate BETWEEN '2021-04-03' AND '2021-04-04'
/*
PayGroupName and SiteName change between days for this employee (555);
and its' very important to to note that for an employee, the change for these 4 columns
(PayGroupName, PayTypeName, SiteName, Region) can happen only at the calendar day level, NOT at the
level of Percentage Score or at the level of the columns - Center, Function, Department.

In essence, the change for these 4 columns (PayGroupName, PayTypeName, SiteName, Region) for an
employee can happen ONLY at the level of a calendar day.
*/

A few weeks back, I posted a question on this same topic, but the data set was easier, with exactly one row for an employee on any given calendar day.

Today, I have a similar but a challenging situation, in which there can be multiple rows for an employee on a given calendar day. This is based on how an employee (say Employee ID = 111) attains 100 % Percentage Score, for a calendar day, say Mar 28, 2021.

I have the desired output dataset, both as a temp table using T-SQL and as a tabular output, so that you can get an absolute clear picture of the problem I have at hand.

Desired Output:

CREATE TABLE #DesiredOutput

(

EmployeeId int,
PayGroupName varchar(100),
PayTypeName varchar(100),
SiteName varchar(100),
Region varchar(100),
PercentageScore int,
Center int,
[Function] varchar(100),
Department varchar(100),
MinDate date,
MaxDate date

)


INSERT INTO #DesiredOutput

VALUES


(111,   'Biweekly',         'Salaried', 'Shop Floor',    'NYC', 100,    17, 'WQ',   'FG',   '3/28/2021',    '4/1/2021'),
(111,   'Biweekly',         'Salaried', 'Shop Floor',    'NYC', 50, 56, 'WQ',   'FG',   '4/2/2021', '4/5/2021'),
(111,   'Biweekly',         'Salaried', 'Shop Floor',    'NYC', 50, 80, 'WQ',   'FG',   '4/2/2021', '4/5/2021'),
(222,   'Monthly',              'PartTime', 'Office Room',   'POR', 100,    19, 'WQ',   'FG',   '3/28/2021',    '3/30/2021'),
(222,   'Monthly',          'PartTime', 'Office Room',   'POR', 100,    20, 'WR',   'FY',   '3/31/2021',    '4/1/2021'),
(222,   'Monthly',          'PartTime', 'Office Room',   'POR', 25, 19, 'RH',   'FG',   '4/2/2021', '4/3/2021'),
(222,   'Monthly',              'PartTime', 'Office Room',   'POR', 75, 19, 'DP',   'FG',   '4/2/2021', '4/3/2021'),
(222,   'Monthly',          'PartTime', 'Office Room',   'POR', 100,    19, 'WQ',   'FG',   '4/4/2021', '4/5/2021'),
(333,   'Weekly',           'Contract', 'Store',     'ATL', 100,    19, 'WQ',   'FG',   '3/28/2021',    '3/28/2021'),
(333,   'Weekly PT',            'Contract', 'Store',     'ATL', 100,    19, 'WQ',   'FG',   '3/29/2021',    '3/29/2021'),
(333,   'Weekly PT',            'Temporary',    'Store',         'ATL', 100,    19, 'WQ',   'FG',   '3/30/2021',    '3/30/2021'),
(333,   'Weekly',           'Contract', 'Store',         'ATL', 100,    19, 'WQ',   'FG',   '3/31/2021',    '4/1/2021'),
(333,   'Weekly',           'Contract', 'Store',     'ATL', 40, 19, 'WQ',   'PQ',   '4/2/2021', '4/5/2021'),
(333,   'Weekly',           'Contract', 'Store',     'ATL', 60, 19, 'WQ',   'AD',   '4/2/2021', '4/5/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 100,    19, 'WQ',   'FG',   '3/28/2021',    '3/28/2021'),
(444,   'Weekly',           'FT-Optional',  'StoreHouse',    'MIA', 100,    19, 'WQ',   'FG',   '3/29/2021',    '3/30/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 100,    19, 'WQ',   'FG',   '3/31/2021',    '4/1/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 50, 10, 'AB',   'FG',   '4/2/2021', '4/3/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 50, 11, 'HP',   'FG',   '4/2/2021', '4/3/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 65, 29, 'RR',   'FG',   '4/4/2021', '4/4/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 35, 37, 'QQ',   'FG',   '4/4/2021', '4/4/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 50, 10, 'AB',   'FG',   '4/5/2021', '4/5/2021'),
(444,   'Weekly',           'FT',           'StoreHouse',    'MIA', 50, 11, 'HP',   'FG',   '4/5/2021', '4/5/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 100,    19, 'WQ',   'FG',   '3/28/2021',    '3/28/2021'),
(555,   'WeeklyOptional',   'PT',           'StoreHouse2',   'RIC', 100,    19, 'WQ',   'FG',   '3/29/2021',    '3/29/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 100,    19, 'WQ',   'FG',   '3/30/2021',    '4/1/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 40, 23, 'BB',   'MA',   '4/2/2021', '4/3/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 25, 24, 'CV',   'RU',   '4/2/2021', '4/3/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 35, 25, 'FJ',   'BN',   '4/2/2021', '4/3/2021'),
(555,   'Weekly-NonOptional',   'FT',           'StoreHouse156', 'RIC', 33, 23, 'BB',   'MA',   '4/4/2021', '4/4/2021'),
(555,   'Weekly-NonOptional',   'FT',           'StoreHouse156', 'RIC', 33, 24, 'CV',   'RU',   '4/4/2021', '4/4/2021'),
(555,   'Weekly-NonOptional',   'FT',           'StoreHouse156', 'RIC', 34, 25, 'FJ',   'BN',   '4/4/2021', '4/4/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 40, 23, 'BB',   'MA',   '4/5/2021', '4/5/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 25, 24, 'CV',   'RU',   '4/5/2021', '4/5/2021'),
(555,   'Weekly',           'FT',           'StoreHouse1',   'RIC', 35, 25, 'FJ',   'BN',   '4/5/2021', '4/5/2021')



SELECT *
FROM #DesiredOutput
ORDER BY EmployeeId, MinDate

I need to use #SourceDataset and convert it to #DesiredOutput.

I need some advice.

For example, the #DesiredOutput (in tabular form) looks as below for an employee (111), and for all employees:

EmployeeId PayGroupName PayTypeName SiteName Region PercentageScore Center Function Department MinDate MaxDate
111 Biweekly Salaried Shop Floor NYC 100 17 WQ FG 3/28/2021 4/1/2021
111 Biweekly Salaried Shop Floor NYC 50 56 WQ FG 4/2/2021 4/5/2021
111 Biweekly Salaried Shop Floor NYC 50 80 WQ FG 4/2/2021 4/5/2021
EmployeeId PayGroupName PayTypeName SiteName Region PercentageScore Center Function Department MinDate MaxDate
111 Biweekly Salaried Shop Floor NYC 100 17 WQ FG 3/28/2021 4/1/2021
111 Biweekly Salaried Shop Floor NYC 50 56 WQ FG 4/2/2021 4/5/2021
111 Biweekly Salaried Shop Floor NYC 50 80 WQ FG 4/2/2021 4/5/2021
222 Monthly PartTime Office Room POR 100 19 WQ FG 3/28/2021 3/30/2021
222 Monthly PartTime Office Room POR 100 20 WR FY 3/31/2021 4/1/2021
222 Monthly PartTime Office Room POR 25 19 RH FG 4/2/2021 4/3/2021
222 Monthly PartTime Office Room POR 75 19 DP FG 4/2/2021 4/3/2021
222 Monthly PartTime Office Room POR 100 19 WQ FG 4/4/2021 4/5/2021
333 Weekly Contract Store ATL 100 19 WQ FG 3/28/2021 3/28/2021
333 WeeklyPT Contract Store ATL 100 19 WQ FG 3/29/2021 3/29/2021
333 WeeklyPT Temporary Store ATL 100 19 WQ FG 3/30/2021 3/30/2021
333 Weekly Contract Store ATL 100 19 WQ FG 3/31/2021 4/1/2021
333 Weekly Contract Store ATL 40 19 WQ PQ 4/2/2021 4/5/2021
333 Weekly Contract Store ATL 60 19 WQ AD 4/2/2021 4/5/2021
444 Weekly FT StoreHouse MIA 100 19 WQ FG 3/28/2021 3/28/2021
444 Weekly FT-Optional StoreHouse MIA 100 19 WQ FG 3/29/2021 3/30/2021
444 Weekly FT StoreHouse MIA 100 19 WQ FG 3/31/2021 4/1/2021
444 Weekly FT StoreHouse MIA 50 10 AB FG 4/2/2021 4/3/2021
444 Weekly FT StoreHouse MIA 50 11 HP FG 4/2/2021 4/3/2021
444 Weekly FT StoreHouse MIA 65 29 RR FG 4/4/2021 4/4/2021
444 Weekly FT StoreHouse MIA 35 37 QQ FG 4/4/2021 4/4/2021
444 Weekly FT StoreHouse MIA 50 10 AB FG 4/5/2021 4/5/2021
444 Weekly FT StoreHouse MIA 50 11 HP FG 4/5/2021 4/5/2021
555 Weekly FT StoreHouse1 RIC 100 19 WQ FG 3/28/2021 3/28/2021
555 WeeklyOptional PT StoreHouse2 RIC 100 19 WQ FG 3/29/2021 3/29/2021
555 Weekly FT StoreHouse1 RIC 100 19 WQ FG 3/30/2021 4/1/2021
555 Weekly FT StoreHouse1 RIC 40 23 BB MA 4/2/2021 4/3/2021
555 Weekly FT StoreHouse1 RIC 25 24 CV RU 4/2/2021 4/3/2021
555 Weekly FT StoreHouse1 RIC 35 25 FJ BN 4/2/2021 4/3/2021
555 Weekly-NonOPTional FT StoreHouse156 RIC 33 23 BB MA 4/4/2021 4/4/2021
555 Weekly-NonOPTional FT StoreHouse156 RIC 33 24 CV RU 4/4/2021 4/4/2021
555 Weekly-NonOPTional FT StoreHouse156 RIC 34 25 FJ BN 4/4/2021 4/4/2021
555 Weekly FT StoreHouse1 RIC 40 23 BB MA 4/5/2021 4/5/2021
555 Weekly FT StoreHouse1 RIC 25 24 CV RU 4/5/2021 4/5/2021
555 Weekly FT StoreHouse1 RIC 35 25 FJ BN 4/5/2021 4/5/2021

Any idea on how to develop the desired output?

The table data at the lower end is the full table, which may be difficult to scroll through; so I have a smaller version for Employee ID = 111 at the top, and the entire desired output next. Alternately, you can use the T-SQL scripts to get an idea of the #DesiredOutput dataset.

CodePudding user response:

The method used:

  • Calculating a flag to indicate gaps of more than 1 day.
  • Creating a ranking based on summing the flag.
  • Group it up, including the calculated ranking.
SELECT EmployeeId, PayGroupName, PayTypeName, SiteName, 
Region, PercentageScore,Center, [Function], Department
, MIN(EverySingleCalendarDate) MinDate
, MAX(EverySingleCalendarDate) MaxDate
INTO #tmpResults
FROM (
  SELECT *
  , Rnk = SUM(Flag) OVER (PARTITION BY EmployeeId, PayGroupName, PayTypeName, SiteName, 
Region, PercentageScore,Center, [Function], Department ORDER BY EverySingleCalendarDate)
  FROM
  (
     SELECT *
     , Flag = IIF(1=DATEDIFF(day, LAG(EverySingleCalendarDate) OVER (PARTITION BY EmployeeId, PayGroupName, PayTypeName, SiteName, 
Region, PercentageScore,Center, [Function], Department ORDER BY EverySingleCalendarDate), EverySingleCalendarDate),0,1)
     FROM #SourceDataset
  ) q1
) q2
GROUP BY EmployeeId, PayGroupName, PayTypeName, SiteName, 
Region, PercentageScore,Center, [Function], Department, Rnk
ORDER BY EmployeeId, MinDate;

SELECT * FROM #tmpResults;
--
-- Differences
--
SELECT * FROM #tmpResults
EXCEPT
SELECT * FROM #DesiredOutput;

SELECT * FROM #DesiredOutput
EXCEPT
SELECT * FROM #tmpResults;
GO
EmployeeId | PayGroupName | PayTypeName | SiteName | Region | PercentageScore | Center | Function | Department | MinDate | MaxDate
---------: | :----------- | :---------- | :------- | :----- | --------------: | -----: | :------- | :--------- | :------ | :------

EmployeeId | PayGroupName | PayTypeName | SiteName | Region | PercentageScore | Center | Function | Department | MinDate | MaxDate
---------: | :----------- | :---------- | :------- | :----- | --------------: | -----: | :------- | :--------- | :------ | :------

db<>fiddle here

CodePudding user response:

This is exactly what @LukStorms provided, but formatted better so that anyone can use this code. Thanks very much for your support.

SELECT

k.EmployeeId,k.PayGroupName,k.PayTypeName,k.SiteName,k.Region,k.PercentageScore,k.Center,k.[Function],k.Department,

MIN(k.EverySingleCalendarDate) AS MinDate,
MAX(k.EverySingleCalendarDate) AS MaxDate

FROM

(

SELECT

g.*,

SUM(g.Flag) OVER

(

PARTITION BY

g.EmployeeId,g.PayGroupName,g.PayTypeName,g.SiteName,g.Region,g.PercentageScore,g.Center,g.[Function],g.Department

ORDER BY g.EverySingleCalendarDate

) AS Rnk

FROM
 
(

SELECT

*,

Flag = CASE

           WHEN DATEDIFF(dd,

                          LAG(EverySingleCalendarDate,1) OVER

                          (PARTITION BY

                          EmployeeId,PayGroupName,PayTypeName,SiteName,Region,PercentageScore,Center,[Function],Department

                          ORDER BY EverySingleCalendarDate),

                          EverySingleCalendarDate) = 1 THEN 0

            ELSE 1

        END

FROM 

#SourceDataset

) g

) k

GROUP BY

k.EmployeeId,k.PayGroupName,k.PayTypeName,k.SiteName,k.Region,k.PercentageScore,k.Center,k.[Function],k.Department,
k.Rnk
--note the Rnk column in the GROUP BY
--ORDER BY k.EmployeeId, MinDateRange
  • Related