Home > Blockchain >  PL/SQL code to remove all the special characters from a particular column of a table
PL/SQL code to remove all the special characters from a particular column of a table

Time:04-13

I need to remove all the special characters from a particular column of a table by using PL/SQL. Can we solve this issue by using "REGEXP_SUBSTR" or "REGEXP_REPLACE"?

Thanks in advance.

Problem This is the CSV format of my data -

id,name,location,profession,age
3890,john,melbourne,  ,developer,45
2895,david,sydney,ana''lyst,61
5198,kelly,perth,mana@ger,78
7071,tim,canberra,tes/ter,61
8132,mike,brisbane,leader,51

I run the following PL/SQL command to remove all the special characters but it didn't work -

DECLARE
      F UTL_FILE.FILE_TYPE;
      V_LINE VARCHAR2 (1000);
      V_id VARCHAR2(1000);
      V_name VARCHAR2(1000);
      V_location VARCHAR2(1000); 
      V_profession VARCHAR2(1000);
      V_age VARCHAR2(1000);
    BEGIN
      F := UTL_FILE.FOPEN ('DATA_PUMP_DIR', 'person_list.CSV', 'R');
    IF UTL_FILE.IS_OPEN(F) THEN
      LOOP
        BEGIN
          UTL_FILE.GET_LINE(F, V_LINE, 1000);
          IF V_LINE IS NULL THEN
            EXIT;
         END IF;
           V_id:= REGEXP_SUBSTR(V_LINE, '[^,@,/,'' ] ', 1, 1);
           V_name:= REGEXP_SUBSTR(V_LINE, '[^,@,/,'' ] ', 1, 2);
           V_location:= REGEXP_SUBSTR(V_LINE, '[^,@,/,'' ] ', 1, 3);
           V_profession:= REGEXP_SUBSTR(V_LINE, '[^,@,/,'' ] ', 1, 4);
           V_age:= REGEXP_SUBSTR(V_LINE, '[^,@,/,'' ] ', 1, 5);
          INSERT INTO person_list VALUES(V_id,V_name,V_location,V_profession,V_age);
          COMMIT;
           EXCEPTION
            WHEN NO_DATA_FOUND THEN
          EXIT;
        END;
      END LOOP;
    END IF;
    UTL_FILE.FCLOSE(F);
   END;
   /

Output the output should be like this - Output of the oracle database without special characters

CodePudding user response:

You can use below SQL query to get this output:

with raw_Text as (select '3890,john,melbourne,  ,developer,45' r from dual  union all select '2895,david,sydney,ana''lyst,61' from dual union all select '5198,kelly,perth,mana@ger,78' from dual
                union all select '7071,tim,canberra,tes/ter,61' from dual
                union all select '8132,mike,brisbane,leader,51' from dual    ),
cleaned_text as(select regexp_replace(regexp_replace(r,'[^A-Za-z0-9,]',''),',,',',') ct from raw_text)
select substr(ct, 1,instr(ct,',',1,1)-1) id, 
substr(ct,instr(ct,',',1,1) 1,instr(ct,',',1,2)-instr(ct,',',1,1)-1) name, 
substr(ct,instr(ct,',',1,2) 1,instr(ct,',',1,3)-instr(ct,',',1,2)-1) location,
substr(ct,instr(ct,',',1,3) 1,instr(ct,',',1,4)-instr(ct,',',1,3)-1) profession,
substr(ct,instr(ct,',',1,4) 1) age
from cleaned_text```

--Edit to add PL/SQL code:
DECLARE
      F UTL_FILE.FILE_TYPE;
      V_LINE VARCHAR2 (32767);
      V_id VARCHAR2(1000);
      V_name VARCHAR2(1000);
      V_location VARCHAR2(1000); 
      V_profession VARCHAR2(1000);
      V_age VARCHAR2(1000);
      v_ct varchar2(32767);
      
    BEGIN
      F := UTL_FILE.FOPEN ('DATA_PUMP_DIR', 'person_list.CSV', 'R');
    IF UTL_FILE.IS_OPEN(F) THEN
      LOOP
        BEGIN
          UTL_FILE.GET_LINE(F, V_LINE, 1000);
          IF V_LINE IS NULL THEN
            EXIT;
         END IF;

v_ct:= regexp_replace(regexp_replace(V_LINE,'[^A-Za-z0-9,]',''),',,',',') ;
v_Id:=substr(ct, 1,instr(ct,',',1,1)-1) ;
v_name:=substr(ct,instr(ct,',',1,1) 1,instr(ct,',',1,2)-instr(ct,',',1,1)-1) ;
v_location:=substr(ct,instr(ct,',',1,2) 1,instr(ct,',',1,3)-instr(ct,',',1,2)-1) ;
v_profession:=substr(ct,instr(ct,',',1,3) 1,instr(ct,',',1,4)-instr(ct,',',1,3)-1) ;
v_age:=substr(ct,instr(ct,',',1,4) 1);

 INSERT INTO person_list 
 values(v_id,v_name,v_location,v_profession,v_age);
          COMMIT;
           EXCEPTION
            WHEN NO_DATA_FOUND THEN
          EXIT;
        END;
      END LOOP;
    END IF;
    UTL_FILE.FCLOSE(F);
   END;
   /

CodePudding user response:

You can replace the lines containing assignments(v_id,v_name...v_age) and INSERT statement with the following

  SELECT LISTAGG(''''||REGEXP_REPLACE(
                              REGEXP_SUBSTR(
                                            v_line,'[^,] ', 1, level
                                           ), 
                                      '[[:punct:]]'
                                     )||'''',
                ',') WITHIN GROUP (ORDER BY level)
    INTO v_line
    FROM (SELECT v_line FROM dual)
  CONNECT BY level <= REGEXP_COUNT(v_line, ',')   1;
 
  EXECUTE IMMEDIATE 'INSERT INTO person_list VALUES('||v_line||')';

where you can remove all punctuation characters by using REGEXP_REPLACE.

  • Related