Home > front end >  Classifying double-pipe separated values in specific columns in SQL Server
Classifying double-pipe separated values in specific columns in SQL Server

Time:07-27

In SQLServer 2016, I have a table with 4 columns. One of the column is formatted as JSON and contains double-pipe separated 4-character "codes" associated with two possible keys, A and B. I am only interested in the code associated with A.

id purchase_date price item_codes
1 2022-06-23 34.99 {"A": "CE01 || DF04"}
2 2022-06-24 149.99 {"A": "CE02 || DF01 || CE05"}
3 2022-06-26 9.99
4 2022-06-26 79.99 {"A": "CE04"}
5 2022-06-27 14.99 {"A": "DF01", "B": "XA01"}
6 2022-06-27 10.99 {"B": "XA01 || XA03"}
7 2022-06-29 119.99 {"A": "CE06 || DF02"}

I am looking to create a view to extend this table and display all A codes equal to CE01, CE02, CE03 in a purchase_code column, all A codes equal to CE04, CE05, CE06 in a sales_code column and all A codes equal to DF01, DF02, DF03, DF04 in a shipping_code column. Note that only one code per column is possible, in other word, the data will never have two codes belonging to the same column as values for A. Also note that I have no control over the format/structure of the data in column item_codes.

id purchase_date price purchase_code sales_code shipping_code
1 2022-06-23 34.99 CE01 DF04
2 2022-06-24 149.99 CE02 CE05 DF01
3 2022-06-26 9.99
4 2022-06-26 79.99 CE04
5 2022-06-27 14.99 DF01
6 2022-06-27 10.99
7 2022-06-29 119.99 CE06 DF02
DECLARE @MyTable TABLE
(   id                      int,
    purchase_date           date, 
    price                   money, 
    item_codes              nvarchar(max)
)

INSERT INTO @MyTable (id, purchase_date, price, item_codes)
VALUES
(1, '2022-06-23', 34.99, '{"A": "CE01 || DF04"}'),
(2, '2022-06-24', 149.99, '{"A": "CE02 || DF01 || CE05"}'),
(3, '2022-06-26', 9.99, NULL),
(4, '2022-06-26', 79.99, '{"A": "CE04"}'),
(5, '2022-06-27', 14.99, '{"A": "DF01", "B": "XA01"}'),
(6, '2022-06-27', 10.99, '{"B": "XA01 || XA03"}'),
(7, '2022-06-29', 119.99, '{"A": "CE06 || DF02"}')

SELECT 
    t.id, 
    t.purchase_date, 
    t.price, 
    ic.code
FROM 
    @MyTable t
OUTER APPLY 
(
    SELECT 
                    "A"                 as code
    FROM
                OPENJSON (t.item_codes)
    WITH (
                    "A"                 varchar(255)
        )
) ic

What is the most efficient way to split and classify codes in column item_codes? I am able to split all values using a combination of STRING_SPLIT and REPLACE (since STRING_SPLIT only takes a single separator character), but struggling joining/classifying the results.

CodePudding user response:

Please try the following solution.

  • 1st CROSS APPLY is retrieving A value as [code] column from the JSON.
  • 2nd CROSS APPLY is tokenizing the [code] column via XML.
  • Finally. we are using XQuery and its FLWOR expression to break down (classify) XML tokens into the appropriate columns in the SELECT clause.

SQL

DECLARE @MyTable TABLE
(   id                      INT PRIMARY KEY,
    purchase_date           date, 
    price                   money, 
    item_codes              nvarchar(max)
);

INSERT INTO @MyTable (id, purchase_date, price, item_codes)
VALUES
(1, '2022-06-23', 34.99, '{"A": "CE01 || DF04"}'),
(2, '2022-06-24', 149.99, '{"A": "CE02 || DF01 || CE05"}'),
(3, '2022-06-26', 9.99, NULL),
(4, '2022-06-26', 79.99, '{"A": "CE04"}'),
(5, '2022-06-27', 14.99, '{"A": "DF01", "B": "XA01"}'),
(6, '2022-06-27', 10.99, '{"B": "XA01 || XA03"}'),
(7, '2022-06-29', 119.99, '{"A": "CE06 || DF02"}');

DECLARE @separator CHAR(2) = '||';

SELECT id, purchase_date, price, t1.code
    , purchase_code = COALESCE(c.query('for $x in /root/r[text()=("CE01","CE02","CE03")]/text() return $x')
        .value('.','VARCHAR(10)'), '')
    , sales_code = COALESCE(c.query('for $x in /root/r[text()=("CE04","CE05","CE06")]/text() return $x')
        .value('.','VARCHAR(10)'), '')
    , shipping_code = COALESCE(c.query('for $x in /root/r[text()=("DF01","DF02","DF03","DF04")]/text() return $x')
        .value('.','VARCHAR(10)'), '')
FROM @MyTable
CROSS APPLY (SELECT JSON_VALUE(item_codes, '$.A')) AS t1(code)
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA['   
      REPLACE(REPLACE(code, SPACE(1), ''), @separator, ']]></r><r><![CDATA[')   
      ']]></r></root>' AS XML)) AS t2(c)
ORDER BY id;

Output

 ---- --------------- -------- ---------------------- --------------- ------------ --------------- 
| id | purchase_date | price  |         code         | purchase_code | sales_code | shipping_code |
 ---- --------------- -------- ---------------------- --------------- ------------ --------------- 
|  1 | 2022-06-23    |  34.99 | CE01 || DF04         | CE01          |            | DF04          |
|  2 | 2022-06-24    | 149.99 | CE02 || DF01 || CE05 | CE02          | CE05       | DF01          |
|  3 | 2022-06-26    |   9.99 | NULL                 |               |            |               |
|  4 | 2022-06-26    |  79.99 | CE04                 |               | CE04       |               |
|  5 | 2022-06-27    |  14.99 | DF01                 |               |            | DF01          |
|  6 | 2022-06-27    |  10.99 | NULL                 |               |            |               |
|  7 | 2022-06-29    | 119.99 | CE06 || DF02         |               | CE06       | DF02          |
 ---- --------------- -------- ---------------------- --------------- ------------ --------------- 

CodePudding user response:

Brute force...

SELECT 
    t.id, 
    t.purchase_date, 
    t.price, 
    ic.code, 
    purchase_code.val AS purchase_code,
    sales_code.val    AS sales_code,
    shipping_code.val AS shipping_code
FROM 
    @MyTable t
OUTER APPLY 
(
    SELECT 
       CONCAT(' || ', A, ' || ')  as code
    FROM
        OPENJSON(t.item_codes)
            WITH (
                A   varchar(255)
            )
) ic
OUTER APPLY
(
  SELECT 'CE01' WHERE code LIKE '% || CE01 || %'
  UNION ALL
  SELECT 'CE02' WHERE code LIKE '% || CE02 || %'
  UNION ALL
  SELECT 'CE03' WHERE code LIKE '% || CE03 || %'
)
  AS purchase_code(val)
OUTER APPLY
(
  SELECT 'CE04' WHERE code LIKE '% || CE04 || %'
  UNION ALL
  SELECT 'CE05' WHERE code LIKE '% || CE05 || %'
  UNION ALL
  SELECT 'CE06' WHERE code LIKE '% || CE06 || %'
)
  AS sales_code(val)
OUTER APPLY
(
  SELECT 'DF01' WHERE code LIKE '% || DF01 || %'
  UNION ALL
  SELECT 'DF02' WHERE code LIKE '% || DF02 || %'
  UNION ALL
  SELECT 'DF03' WHERE code LIKE '% || DF03 || %'
  UNION ALL
  SELECT 'DF04' WHERE code LIKE '% || DF04 || %'
)
  AS shipping_code(val)

Slightly tidier...

SELECT 
    t.id, 
    t.purchase_date, 
    t.price, 
    ic.code,
    MAX(CASE WHEN code.value IN ('CE01', 'CE02', 'CE03'        ) THEN value END),
    MAX(CASE WHEN code.value IN ('CE04', 'CE05', 'CE06'        ) THEN value END),
    MAX(CASE WHEN code.value IN ('DF01', 'DF02', 'DF03', 'DF04') THEN value END)
FROM 
    @MyTable t
OUTER APPLY 
(
    SELECT 
       REPLACE(A, ' || ', ',')  as code
    FROM
        OPENJSON(t.item_codes)
            WITH (
                A   varchar(255)
            )
) ic
OUTER APPLY
  STRING_SPLIT(ic.code, ',') AS code
GROUP BY
  t.id, 
  t.purchase_date, 
  t.price, 
  ic.code

https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=86b851dcc1e40bbe68ac3d507afa11c8

  • Related