Home > Back-end >  SQL STUFF FOR XML with specific grouping
SQL STUFF FOR XML with specific grouping

Time:08-20

I'm working in SQL Server 2014, and I have the following simple data, which tracks calling history of users:

PhoneNumber Activity    ActivityDate
------------------------------------
9075551234  Incoming    2022-04-01
9075551234  Outgoing    2022-04-06
9075551234  Outgoing    2022-04-10
9075551234  Outgoing    2022-08-02
9075551234  Incoming    2022-08-05
9075551234  Lateral     2022-08-10

5551239876  Incoming    2022-07-01
5551239876  Outgoing    2022-07-06
5551239876  Outgoing    2022-08-01
5551239876  Outgoing    2022-08-02
5551239876  Incoming    2022-08-15

I need to group the ACTIVITY values into one field, which effortlessly be done using the STUFF FOR XML function:

phonenumber FirstContact    LatestContact   Result
------------------------------------------------------------------
5551239876  2022-07-01      2022-08-15      Incoming,Outgoing,Outgoing,Outgoing,Incoming
9075551234  2022-04-01      2022-08-10      Incoming,Outgoing,Outgoing,Outgoing,Incoming,Lateral

However I need to conditionally group these, on the condition that there was more than a 90 day between the activity date. Desired result:

phonenumber FirstContact    LatestContact   Result
-------------------------------------------------------------------
9075551234  2022-04-01      2022-04-10      Incoming,Outgoing,Outgoing
9075551234  2022-08-02      2022-08-10      Outgoing,Incoming,Lateral
5551239876  2022-07-01      2022-08-15      Incoming,Outgoing,Outgoing,Outgoing,Incoming

Here's the code/sample values I'm using, thanks!

DECLARE @separator CHAR(1) = ',';

WITH testTable   (PhoneNumber,Activity,ActivityDate) as 
(
  SELECT 9075551234 , 'Incoming' , '2022-04-01'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-06'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-10'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 9075551234 , 'Incoming' , '2022-08-05'    UNION ALL
  SELECT 9075551234 , 'Lateral' ,  '2022-08-10'     UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-07-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-07-06'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-08-15'   
)    
 
SELECT p.phonenumber
,   min(activitydate) FirstContact
,max(activitydate) LatestContact 
, STUFF((SELECT @separator   Activity 
          FROM testTable AS c
          WHERE c.phonenumber = p.phonenumber
 
          FOR XML PATH('')), 1, LEN(@separator), '') AS Result
FROM testTable AS p
GROUP BY p.phonenumber  
ORDER BY p.phonenumber;

CodePudding user response:

Below query will able to generate the desired result. Please not I have addeed one more row in your input data to validate results

DECLARE @separator CHAR(1) = ',';
DECLARE @allowedgap smallint = 90;


WITH testTable   (PhoneNumber,Activity,ActivityDate) as 
(
  SELECT 9075551234 , 'Incoming' , '2022-04-01'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-06'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-10'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 9075551234 , 'Incoming' , '2022-08-05'    UNION ALL
  SELECT 9075551234 , 'Lateral' ,  '2022-08-10'     UNION ALL
  SELECT 9075551234 , 'Lateral' ,  '2022-12-10'     UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-07-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-07-06'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-08-15'   
)    
 
, testTable2
as
(
select t.PhoneNumber
    , t.Activity
    , t.ActivityDate 
    , LAST_VALUE(t.ActivityDate) OVER (PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate rows BETWEEN current row and 1 Following ) AS NextActivityDate
    , First_VALUE(t.ActivityDate) OVER (PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate rows BETWEEN 1 preceding and current row ) AS PreviousActivityDate
from testTable t
)
, testTable3
as
(
select t.PhoneNumber
    , t.Activity
    , t.ActivityDate 
    --, t.NextActivityDate
    --, t.PreviousActivityDate
    , case when DATEDIFF(day,t.PreviousActivityDate, t.ActivityDate) > @allowedgap  then 1 else 0 end newPeroidStart
from testTable2 t
)
, testTable4
as
(select t.PhoneNumber
    , t.Activity
    , t.ActivityDate
, sum(newPeroidStart) over(PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate  ROWS UNBOUNDED PRECEDING) grp
from testTable3 t
)

SELECT p.phonenumber
,   min(activitydate) FirstContact
,max(activitydate) LatestContact 
, STUFF((SELECT @separator   Activity 
          FROM testTable4 AS c
          WHERE c.phonenumber = p.phonenumber and c.grp = p.grp
          order by c.ActivityDate   
          FOR XML PATH('')), 1, LEN(@separator), '') AS Result
FROM testTable4 AS p
GROUP BY p.phonenumber, p.grp
ORDER BY p.phonenumber;

Please refer excellent article from Itzik Ben-Gan is on very similar problem for more details https://sqlperformance.com/2018/09/t-sql-queries/special-islands

Best!!

CodePudding user response:

Ok, with the additional Information, that after 90 days a new row must be created. my CTE2 determines for every phonenumber the diefference between the days and when it is more than 90 it will add a 1 else a 0 the second CTE3 is nededed to get the grouping correct, the idea is simple, as every new row to crreate will add 1 to the mix you start with 0 1 2 and so on and the follwoing rows will attribute nothing as they are 0

the rest is as you already have but with the additional rank

DECLARE @separator CHAR(1) = ',';

WITH testTable   (PhoneNumber,Activity,ActivityDate) as 
(
  SELECT 9075551234 , 'Incoming' , '2022-04-01'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-06'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-10'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 9075551234 , 'Incoming' , '2022-08-05'    UNION ALL
  SELECT 9075551234 , 'Lateral' ,  '2022-08-10'     UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-07-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-07-06'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-08-15'   
),
CTE2 as(
SELECT PhoneNumber,Activity,ActivityDate
, CASE WHEN DATEDIFF(Day,LAG(ActivityDate) OVER (PARTITION BY PhoneNumber ORDER BY  ActivityDate),ActivityDate) > 90 THEn 1 ELSE 0 END rn FROM testTable 
)
, CTE3 as (
SELECT
PhoneNumber,Activity,ActivityDate, SUM(rn)  OVER (PARTITION BY PhoneNumber ORDER BY  ActivityDate) rn2
FROM CTE2 
)
SELECT p.phonenumber
,   min(activitydate) FirstContact
,max(activitydate) LatestContact 
, STUFF((SELECT @separator   Activity 
          FROM CTE3 AS c
          WHERE c.phonenumber = p.phonenumber AND c.rn2 = p.rn2

          FOR XML PATH('')), 1, LEN(@separator), '') AS Result
FROM CTE3 AS p
GROUP BY p.phonenumber,rn2  
ORDER BY p.phonenumber;
phonenumber | FirstContact | LatestContact | Result                                      
----------: | :----------- | :------------ | :-------------------------------------------
 5551239876 | 2022-07-01   | 2022-08-15    | Incoming,Outgoing,Outgoing,Outgoing,Incoming
 9075551234 | 2022-04-01   | 2022-04-10    | Incoming,Outgoing,Outgoing                  
 9075551234 | 2022-08-02   | 2022-08-10    | Outgoing,Incoming,Lateral                   

db<>fiddle here

  • Related