Given a string like
73 % polyester, 20 % modacrylic, 7 % cotton
there should be 3 rows with 2 columns:
Percentage | Component |
---|---|
73 | polyester |
20 | modacrylic |
7 | cotton |
5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material
Output should have 3 rows:
Percentage | Component |
---|---|
5,5 | cotton |
20 | modacrylic |
74,5 | polyester |
If a string doesn't start with a numeric character then by default the Percentage Column should get value as 100 and component should get all the characters before Comma (',')
e.g.
Polyester fibre , 150 g/sq.m.
Percentage | Component |
---|---|
100 | Polyester fibre |
I have written following logic but it's not working as expected:
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.str AS component,
b.str AS percentage
FROM
( SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
level rowseq,
regexp_substr(str,'[^/] ',1,ROWNUM) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%] ',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%] ')
)
WHERE
rowseq = 1
CONNECT BY
level <= regexp_count(str,'[^/] ')
) a,
(
SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
ROWNUM rowseq,
TRIM(regexp_substr(str,'[^/%] ',1,ROWNUM) ) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%] ',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%] ')
)
WHERE
rowseq = 2
CONNECT BY
level <= regexp_count(str,'[^/%] ')
) b
WHERE a.rowseq = b.rowseq
AND a.str IS NOT NULL
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
Here I am getting only 1 row with Percentage as 26,5 and Component as polyester , min. 90 , although I should get 2 rows
Percentage | Component |
---|---|
26,5 | polyester |
67 | cotton |
Please guide.
CodePudding user response:
If you consider your string as a csv then there is a way to split the elements from csv string and extract percentage and component. Comments inside code:
Select
TRIM(SubStr(COL1, 1, InStr(COL1, '%') - 1)) "PERCENTAGE",
TRIM(SubStr(COL1, InStr(COL1, '%') 1)) "COMPONENT"
FROM
(
SELECT
COL1
FROM
(
SELECT
INDX,
MY_STR1,
COL1_ELEMENTS,
CASE WHEN SubStr(TRIM(COL1), 1, 1) IN('0','1','2','3','4','5','6','7','8','9') THEN COL1 ELSE '100 % ' || COL1 END "COL1"
FROM
(
SELECT
0 "INDX",
COL1 "MY_STR1",
COL1_ELEMENTS "COL1_ELEMENTS",
COL1 "COL1"
FROM
(
SELECT
REPLACE(COL1, DELIMITER || ' ', DELIMITER) "COL1",
Trim(Length(Replace(COL1, DELIMITER || ' ', DELIMITER))) - Trim(Length(Translate(REPLACE(COL1, DELIMITER || ' ', DELIMITER), 'A' || DELIMITER, 'A'))) 1 "COL1_ELEMENTS"
FROM (SELECT
'73 % polyester, 20 % modacrylic, something else, 67 % cotton' "COL1", ',' "DELIMITER" -- here comes your string and delimiter, if delimiter is not comma (,) then you should replace ',' with your delimiter in RULES clause
FROM DUAL)
)
)
MODEL
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, CAST('a' as VarChar2(4000)) as MY_STR1)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) Length(',')),
COL1[ITERATION_NUMBER 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS
)
--
-- Result:
--
-- PERCENTAGE COMPONENT
-- 73 polyester
-- 20 modacrylic
-- 100 something else
-- 67 cotton
CodePudding user response:
You need to do your string transformation step-by-step and thoroughly.
- Remove White space after comma and percent
replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',',')
2.Using "," instead of "%" as a string delimiter,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d ),(\d )', '\1.\2'),'[^,] ',1,ROWNUM) str
3.Filter for both row 1 and 3 NOT JUST 1
WHERE rowseq IN (1, 3)
- NOT SURE why you need the nested query at line 13 and the join subquery "b" starting at line 46 (IT'S REDUNDUNT)
- Do your component, percent split using the '%'
REGEXP_REPLACE(a.str, '^(\d .*\d*)(%)([^,]*)$','\3') AS component,
REGEXP_REPLACE(a.str, '^(\d .*\d*)(%)([^,]*)$','\1') AS percentage
And you will get..
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.rowseq,
REGEXP_REPLACE(a.str, '^(\d .*\d*)(%)([^,]*)$','\3') AS component,
REGEXP_REPLACE(a.str, '^(\d .*\d*)(%)([^,]*)$','\1') AS percentage
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d ),(\d )', '\1.\2'),'[^,] ',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count(REGEXP_REPLACE(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'), '(\d ),(\d )', '\1.\2'),'[^,] ')
) a
WHERE rowseq IN (1, 3)
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
For more complicated scenarios and robust code do the following. Of course you need to specify your transformation RULES carefully inside the WITH Clause, I'm Not a big fan of nested statements but the use case demands it.
var the_specification varchar2(500);
exec :the_specification := '5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material, Polyester fibre , 150 g/sq.m.';
WITH specstr as (
SELECT
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(:the_specification,'\s*%','% '), -- remove white space tagging the percentage'%' sign
'(\d ),(\d )', '\1.\2'), -- Replace dot'.' as decimal separator
'(,*[^,]*recycl[^,]*,*)', ','), -- remove any additonal comment THE_RULE: consider 'recycl' as comment
'(^[[:alnum:][:space:]] ),|,([[:alnum:][:space:]] ),|,([[:alnum:][:space:]] $)', ',100% \1\2\3,'), -- Add the default Percentage Column '100%'
'\s{2,}', ' '), -- remove double white space
',{2,}|\s,|,\s', ','), -- remove duplicate commas ',' or any trailing and tagging white space from comma
'^,*|,*$', '') refined_str -- remove comma from the start and end
FROM DUAL
)
SELECT REGEXP_REPLACE(mtrl_ratio.str, '^(\d ,*\d*)(%)([^,]*)$','\1') Percentage,
REGEXP_REPLACE(mtrl_ratio.str, '^(\d ,*\d*)(%)([^,]*)$','\3') Component
FROM (
SELECT
level rowseq,
REGEXP_REPLACE(
regexp_substr(
specstr.refined_str
,'[^,] ',1,ROWNUM), -- split string using comma as delimiter
'(\d )\.(\d )', '\1,\2') str -- Replace Back comma',' as decimal separator)
FROM
specstr
CONNECT BY
level <= regexp_count(
specstr.refined_str
,'[^,] ')
)mtrl_ratio
WHERE REGEXP_LIKE(mtrl_ratio.str, '%')