I have a situation where an application is storing chunks of text in a CLOB.
Each chunk of text is surrounded by a tag [SYSDATE] (see below for test CASE).
I'm looking for a query that can extract the data between the matching tags. For example, how can I get the text between [11-22-2021 14:16:19] tags for all matches. In this case 'ZZZZZZZZZZZZZZZZZZZZ'
Secondly, is there a better way to handle this situation? Perhaps maybe using XML? All input, ideas and working examples would be greatly appreciated.
ALTER SESSION SET NLS_DATE_FORMAT = 'MMDDYYYY HH24:MI:SS';
CREATE table t(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
c CLOB DEFAULT ' ',
create_date DATE DEFAULT SYSDATE
);
/
insert into t (c) values (' ')
/
CREATE OR REPLACE PROCEDURE lob_append(
p_clob IN OUT CLOB,
p_text IN VARCHAR2
)
AS
l_text varchar2(32760);
l_date_string VARCHAR2(50);
BEGIN
select '[' || TO_CHAR (SYSDATE, 'MM-DD-YYYY HH24:MI:SS') || ']'
into l_date_string from dual;
-- newline each time code is appended for clarity.
l_text :=chr(10) || l_date_string || chr(10)
|| p_text || chr(10)
|| l_date_string||chr(10);
dbms_lob.writeappend(p_clob, length(l_text), l_text );
END;
/
DECLARE
l_clob CLOB := empty_clob();
lTime date;
BEGIN
lTime := sysdate;
SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;
lob_append(l_clob, rpad('Z',20,'Z'));
loop
exit when sysdate = lTime interval '5' second;
end loop;
l_clob := empty_clob();
SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;
lob_append(l_clob, rpad('Y',10,'Y'));
END;
/
SEQ_NUM C CREATE_DATE
1
[11-22-2021 14:16:19]
ZZZZZZZZZZZZZZZZZZZZ
[11-22-2021 14:16:19]
[11-22-2021 14:16:24]
YYYYYYYYYY
[11-22-2021 14:16:24]
11222021 14:15:54
CodePudding user response:
Your sample CLOB contains newlines; I assumed that is correct, and handled the newlines explicitly. (The solution below assumes each "token" does not contain newlines itself - if it does, then you can modify the regexp_substr
call slightly, using the option to allow the dot metacharacter to match newlines.)
select t.seq_num, l.ord, l.token
from t cross join lateral
(
select level as ord,
regexp_substr(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
|| chr(10) || '(.*?)' || chr(10) || '\1'
, 1, level, null, 2)
as token
from dual
connect by level <=
regexp_count(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
|| chr(10) || '(.*?)' || chr(10) || '\1')
) l
order by seq_num, ord -- if needed
;
SEQ_NUM ORD TOKEN
---------- ---------- -------------------------
1 1 ZZZZZZZZZZZZZZZZZZZZ
1 2 YYYYYYYYYY