I wanted to select / delete all duplicate rows on multiple table. I have searched the internet for clues but all I see are queries that selects duplicate rows based one or more column. Like this:
SELECT col1 count(*) from table_name group by col1 having count(*) > 1
What I want to achieve is to select duplicate rows based on ALL COLUMNS, as long as all their values in each column are the same.
I am dealing with multiple tables so I want it to be generic so it could work on any tables.
CodePudding user response:
If you want to delete the duplicates and leave the first instance of each row then you can use a sub-query within the delete and correlate on the ROWID
pseudo-column and find the rows within each group that have a ROW_NUMBER
greater than 1:
DELETE FROM table_name
WHERE ROWID IN (
SELECT ROWID
FROM (
SELECT ROW_NUMBER() OVER (
PARTITION BY col1, col2, col3, /*...,*/ colN -- List all the columns
ORDER BY ROWID
) AS rn
FROM table_name
)
WHERE rn > 1
);
If you want to delete all copies of all duplicates then:
DELETE FROM table_name
WHERE ROWID IN (
SELECT ROWID
FROM (
SELECT COUNT(*) OVER (
PARTITION BY col1, col2, col3, /*...,*/ colN -- List all the columns
) AS cnt
FROM table_name
)
WHERE cnt > 1
);
CodePudding user response:
I have adopted MTO answer and made some corrections and more generic
declare
l_column_list varchar2(32767);
l_table_name varchar2(4000) := 'AAAA_DATES';
begin
for rec in (select column_name, column_id
from dba_tab_cols
where table_name = l_table_name
order by column_id) loop
if (rec.column_id = 1) then
l_column_list := rec.column_name;
else
l_column_list := l_column_list || ',' || rec.column_name;
end if;
end loop;
execute immediate 'DELETE FROM ' || l_table_name ||
' WHERE ROWID IN (
SELECT ROW_id
FROM (
SELECT rowid row_id, ROW_NUMBER() OVER (
PARTITION BY ' || l_column_list ||
' ORDER BY ROWNUM
) AS rn
FROM ' || l_table_name || '
)
WHERE rn > 1
)';
dbms_output.put_line(sql%rowcount);
commit;
end;