// update column "Count" to have the value of its highest duplicate row ID, delete //duplicate rows based on the "Url" column (leaving only the row with the lowest ID),
| ID | First_Name | Count | Url |
| -- | ---------- | ----- | ---------- |
| 1 | A | 10 | www.A.com |
| 2 | B | 21 | www.B.com |
| 3 | C | 12 | www.C.com |
| 4 | D | 31 | www.D.com |
| 5 | A | 13 | www.A.com |
| 6 | D | 18 | www.D.com |
| 7 | A | 5 | www.A.com |
EXPECTED RESULT
| ID | First_Name | Count | Url |
| -- | ---------- | ----- | --------- |
| 1 | A | 5 | www.A.com |
| 2 | B | 21 | www.B.com |
| 3 | C | 12 | www.C.com |
| 4 | D | 18 | www.D.com |
CodePudding user response:
As you did not include what kind of database this is, I have to take a wild guess. But here is how you can select the latest duplicates in SQL Server Management Studio v17.9.1 Fiddle: https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=e266cfbca912dc1a541f5f474a0b018d
This is how you can select the newest values, and with this logic, you can simply update the values where the First_Name matches, and delete the rest etc.
CREATE TABLE #testValues (
ID int NOT NULL
,[First_Name] varchar(255) NOT NULL
,[Count] int NULL
,[Url] varchar(255) NOT NULL
)
INSERT INTO #testvalues (
ID
,[First_Name]
,[Count]
,[Url]
)
VALUES (1,'A',10,'www.A.com'),
(2,'B',21,'www.B.com'),
(3,'C',12,'www.C.com'),
(4,'D',31,'www.D.com'),
(5,'A',13,'www.A.com'),
(6,'D',18,'www.D.com'),
(7,'A',5,'www.A.com');
select min(a.id) as 'ID', a.First_Name
into #lowestId
from #testValues a
group by a.first_name
SELECT a.*
into #newValues
from #testValues a
left join #testValues b
on a.First_Name=b.First_Name and a.Url=b.Url and a.id < b.ID
where b.ID is null
order by a.First_Name
select case when b.ID is null then a.ID else b.ID end as 'ID',
a.count,
a.first_name,
a.url
from #newValues a
left join #lowestId b
on a.first_name=b.first_name and b.ID < a.ID
order by case when b.ID is null then a.ID else b.ID end
drop table #lowestId
drop table #newValues
drop table #testValues
CodePudding user response:
Another one without changing table structure :
UPDATE your_table yt SET yt.count = (SELECT max(yt2.id) FROM your_table yt2 WHERE yt2.Url = yt.Url AND yt.id != yt2.id);
DELETE FROM your_table yt WHERE yt.Url IN (SELECT yt2.Url from your_table WHERE yt2.id < yt.id)
CodePudding user response:
I think you want to delete the doubles values group by url, so you can try this :
DELETE t FROM your_table t
INNER JOIN (
SELECT Url,MIN(count) AS min_count
FROM your_table
GROUP BY Url
HAVING COUNT(ID) > 1
) as t2 on t2.Url = t.Url
WHERE t.count > t2.min_count