Home > Mobile >  oracle regular expression split but workaround brackets
oracle regular expression split but workaround brackets

Time:04-19

I have a string like : "Att1:Val1,[Txt1,Txt2:Txt3]:Val2"
Using Oracle Sql, I would like to achieve a split into rows & columns as below :

lvl substr2 substr3
1 Att1 Val1
2 [Txt1,Txt2:Txt3] Val2

I have tried below code, but can't figure out how not to split by comma and colon the values between the brackets

with WTBL as
(
    select 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2,' as WCLN
    from dual
)
select lvl, substr1, substr2, substr3, WCLN
from WTBL
cross join xmltable('if (contains($PRM,",")) 
        then
            let $list := ora:tokenize($PRM, ","),
                $cnt := count($list)
          for $val at $r in $list 
          where $r < $cnt
          return $val
        else $PRM'
  passing WCLN as PRM
  columns substr1 varchar2(4000) path '.'
    ,substr2 varchar2(4000) path 'if (contains( . , ":")) then
            let $list := ora:tokenize( . ,":"),
                $cnt := count($list)
          for $val at $r in $list
          where $r = $cnt - 1
          return $val
        else . '
    ,substr3 varchar2(4000) path 'if (contains( . , ":")) then
            let $list := ora:tokenize( . ,":"),
                $cnt := count($list)
          for $val at $r in $list
          where $r = $cnt
          return $val
        else . '
     ,lvl FOR ORDINALITY
) xm

Your help is much appreciated!
Vlad

CodePudding user response:

You can use the regular expression (\[.*?\]|.*?):(.*?)(,|$) and a recursive sub-query:

WITH matches (value, lvl, substr1, substr2, epos) AS (
  SELECT value,
         1,
         REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 1),
         REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 2),
         REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, 1)
  FROM   table_name
UNION ALL
  SELECT value,
         lvl   1,
         REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 1),
         REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 2),
         REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, 1)
  FROM   matches
  WHERE  epos > 0
)
SELECT lvl,
       substr1,
       substr2
FROM   matches
WHERE  epos > 0;

Or, simple (faster) string functions:

WITH matches (value, lvl, spos, mpos, epos) AS (
  SELECT value,
         1,
         1,
         CASE
         WHEN SUBSTR(value, 1, 1) = '['
         THEN INSTR(value, ']:', 1)   1
         ELSE INSTR(value, ':', 1)
         END,
         INSTR(
           value,
           ',',
           CASE
           WHEN SUBSTR(value, 1, 1) = '['
           THEN INSTR(value, ']:', 1)   1
           ELSE INSTR(value, ':', 1)
           END
         )
  FROM   table_name
UNION ALL
  SELECT value,
         lvl   1,
         epos   1,
         CASE
         WHEN SUBSTR(value, epos   1, 1) = '['
         THEN INSTR(value, ']:', epos   1)   1
         ELSE INSTR(value, ':', epos   1)
         END,
         INSTR(
           value,
           ',',
           CASE
           WHEN SUBSTR(value, epos   1, 1) = '['
           THEN INSTR(value, ']:', epos   1)   1
           ELSE INSTR(value, ':', epos   1)
           END
         )
  FROM   matches
  WHERE  epos > 0
)
SELECT lvl,
       SUBSTR(value, spos, mpos - spos) AS substr1,
       CASE epos
       WHEN 0
       THEN SUBSTR(value, mpos   1)
       ELSE SUBSTR(value, mpos   1, epos - mpos - 1)
       END AS substr2
FROM   matches;

Which, for the sample data:

CREATE TABLE table_name (value) AS
SELECT 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2' FROM DUAL;

Both output:

LVL SUBSTR1 SUBSTR2
1 Att1 Val1
2 [Txt1,Txt2:Txt3] Val2

db<>fiddle here

  • Related