update column "Count" to have the value of its highest duplicate row ID, delete duplicate-CodePudding

// update column "Count" to have the value of its highest duplicate row ID, delete //duplicate rows based on the "Url" column (leaving only the row with the lowest ID),

| ID | First_Name | Count | Url |
| -- | ---------- | ----- | ---------- |
| 1  | A          | 10    |  www.A.com |
| 2  | B          | 21    |  www.B.com |
| 3  | C          | 12    |  www.C.com |
| 4  | D          | 31    |  www.D.com |
| 5  | A          | 13    |  www.A.com |
| 6  | D          | 18    |  www.D.com |
| 7  | A          | 5     |  www.A.com |

EXPECTED RESULT

| ID | First_Name | Count | Url |
| -- | ---------- | ----- | --------- |
| 1  | A          | 5     | www.A.com |
| 2  | B          | 21    | www.B.com |
| 3  | C          | 12    | www.C.com |
| 4  | D          | 18    | www.D.com |

CodePudding user response：

As you did not include what kind of database this is, I have to take a wild guess. But here is how you can select the latest duplicates in SQL Server Management Studio v17.9.1 Fiddle: https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=e266cfbca912dc1a541f5f474a0b018d

This is how you can select the newest values, and with this logic, you can simply update the values where the First_Name matches, and delete the rest etc.

CREATE TABLE #testValues (
   ID int NOT NULL
   ,[First_Name] varchar(255) NOT NULL
   ,[Count]  int NULL
   ,[Url]  varchar(255) NOT NULL
   ) 

INSERT INTO #testvalues (
   ID
   ,[First_Name]
   ,[Count]
   ,[Url]
   )

VALUES (1,'A',10,'www.A.com'),
(2,'B',21,'www.B.com'),
(3,'C',12,'www.C.com'),
(4,'D',31,'www.D.com'),
(5,'A',13,'www.A.com'),
(6,'D',18,'www.D.com'),
(7,'A',5,'www.A.com');

select min(a.id) as 'ID', a.First_Name
into #lowestId
from #testValues a
group by a.first_name

SELECT a.*
into #newValues
from #testValues a
left join #testValues b
on a.First_Name=b.First_Name and a.Url=b.Url and a.id < b.ID
where b.ID is null
order by a.First_Name

select case when b.ID is null then a.ID else b.ID end as 'ID', 
a.count, 
a.first_name, 
a.url
from #newValues a
left join #lowestId b
on a.first_name=b.first_name and b.ID < a.ID
order by case when b.ID is null then a.ID else b.ID end

drop table #lowestId
drop table #newValues
drop table #testValues

CodePudding user response：

Another one without changing table structure :

UPDATE your_table yt SET yt.count = (SELECT max(yt2.id) FROM your_table yt2 WHERE yt2.Url = yt.Url AND yt.id != yt2.id);

DELETE FROM your_table yt WHERE yt.Url IN (SELECT yt2.Url from your_table WHERE yt2.id < yt.id)

CodePudding user response：

I think you want to delete the doubles values group by url, so you can try this :

DELETE t FROM your_table t 
INNER JOIN (
   SELECT Url,MIN(count) AS min_count
   FROM your_table
   GROUP BY Url 
   HAVING COUNT(ID) > 1
) as t2 on t2.Url = t.Url
WHERE t.count > t2.min_count