I'm trying to use regexp_subst to split a delimited string. I'm running into an issue when delimited fields are null. The regexp_substr
ignores the nulls and moves to the next occurrence of the delimiter. Is there a way to do this with regexp_substr
? If not, what alternative do you use?
--Expecting hello, gets hello
select regexp_substr('hello@@world', '[^@] ', 1, 1)
from dual;
--Expecting null, gets world
select regexp_substr('hello@@world', '[^@] ', 1, 2)
from dual;
--Expecting world, gets null
select regexp_substr('hello@@world', '[^@] ', 1, 3)
from dual;
EDIT: tried this, but it works only with | which isn't an option
CodePudding user response:
Answering based on Matbailie's input in above comment
select regexp_substr('hello@@world', '(.*?)(@|$)', 1, 1,NULL,1)
from dual
union all
--Expecting null, gets null
select regexp_substr('hello@@world', '(.*?)(@|$)', 1, 2,NULL,1)
from dual
union all
--Expecting world, gets world
select regexp_substr('hello@@world', '(.*?)(@|$)', 1, 3,NULL,1)
from dual;
CodePudding user response:
You do not need regular expressions. It can be done with simple (and faster) string functions in a recursive sub-query:
WITH data (value) AS (
SELECT 'hello@@world' FROM DUAL
),
bounds (value, start_pos, end_pos) AS (
SELECT value,
1,
INSTR(value, '@', 1)
FROM data
UNION ALL
SELECT value,
end_pos 1,
INSTR(value, '@', end_pos 1)
FROM bounds
WHERE end_pos > 0
)
SEARCH DEPTH FIRST BY value SET order_id
SELECT CASE end_pos
WHEN 0
THEN SUBSTR(value, start_pos)
ELSE SUBSTR(value, start_pos, end_pos - start_pos)
END AS item
FROM bounds;
Which outputs:
ITEM hello null world
Or, if you want the data in columns (rather than rows):
WITH data (value) AS (
SELECT 'hello@@world' FROM DUAL
),
bounds (value, pos1, pos2) AS (
SELECT value,
INSTR(value, '@', 1, 1),
INSTR(value, '@', 1, 2)
FROM data
)
SELECT SUBSTR(value, 1, pos1 - 1) AS item1,
SUBSTR(value, pos1 1, pos2 - pos1 - 1) AS item2,
SUBSTR(value, pos2 1) AS item3
FROM bounds
Which outputs:
ITEM1 ITEM2 ITEM3 hello null world
If you did want to use (slower) regular expressions then:
WITH data (value) AS (
SELECT 'hello@@world' FROM DUAL
)
SELECT item
FROM data d
CROSS JOIN LATERAL(
SELECT REGEXP_SUBSTR( d.value, '(.*?)(@|$)', 1, LEVEL, NULL, 1) AS item
FROM DUAL
CONNECT BY LEVEL < REGEXP_COUNT( d.value, '(.*?)(@|$)')
)
or, for columns:
WITH data (value) AS (
SELECT 'hello@@world' FROM DUAL
)
SELECT REGEXP_SUBSTR(value, '(.*?)(@|$)', 1, 1, NULL, 1) AS item1,
REGEXP_SUBSTR(value, '(.*?)(@|$)', 1, 2, NULL, 1) AS item2,
REGEXP_SUBSTR(value, '(.*?)(@|$)', 1, 3, NULL, 1) AS item3
FROM data
(Which both have the same output as above)
db<>fiddle here