Home > Blockchain >  update column "Count" to have the value of its highest duplicate row ID, delete duplicate
update column "Count" to have the value of its highest duplicate row ID, delete duplicate

Time:04-06

// update column "Count" to have the value of its highest duplicate row ID, delete //duplicate rows based on the "Url" column (leaving only the row with the lowest ID),

| ID | First_Name | Count | Url |
| -- | ---------- | ----- | ---------- |
| 1  | A          | 10    |  www.A.com |
| 2  | B          | 21    |  www.B.com |
| 3  | C          | 12    |  www.C.com |
| 4  | D          | 31    |  www.D.com |
| 5  | A          | 13    |  www.A.com |
| 6  | D          | 18    |  www.D.com |
| 7  | A          | 5     |  www.A.com |

EXPECTED RESULT

| ID | First_Name | Count | Url |
| -- | ---------- | ----- | --------- |
| 1  | A          | 5     | www.A.com |
| 2  | B          | 21    | www.B.com |
| 3  | C          | 12    | www.C.com |
| 4  | D          | 18    | www.D.com |

CodePudding user response:

As you did not include what kind of database this is, I have to take a wild guess. But here is how you can select the latest duplicates in SQL Server Management Studio v17.9.1 Fiddle: https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=e266cfbca912dc1a541f5f474a0b018d

This is how you can select the newest values, and with this logic, you can simply update the values where the First_Name matches, and delete the rest etc.

CREATE TABLE #testValues (
   ID int NOT NULL
   ,[First_Name] varchar(255) NOT NULL
   ,[Count]  int NULL
   ,[Url]  varchar(255) NOT NULL
   ) 

INSERT INTO #testvalues (
   ID
   ,[First_Name]
   ,[Count]
   ,[Url]
   )

VALUES (1,'A',10,'www.A.com'),
(2,'B',21,'www.B.com'),
(3,'C',12,'www.C.com'),
(4,'D',31,'www.D.com'),
(5,'A',13,'www.A.com'),
(6,'D',18,'www.D.com'),
(7,'A',5,'www.A.com');

select min(a.id) as 'ID', a.First_Name
into #lowestId
from #testValues a
group by a.first_name

SELECT a.*
into #newValues
from #testValues a
left join #testValues b
on a.First_Name=b.First_Name and a.Url=b.Url and a.id < b.ID
where b.ID is null
order by a.First_Name

select case when b.ID is null then a.ID else b.ID end as 'ID', 
a.count, 
a.first_name, 
a.url
from #newValues a
left join #lowestId b
on a.first_name=b.first_name and b.ID < a.ID
order by case when b.ID is null then a.ID else b.ID end

drop table #lowestId
drop table #newValues
drop table #testValues

CodePudding user response:

Another one without changing table structure :

UPDATE your_table yt SET yt.count = (SELECT max(yt2.id) FROM your_table yt2 WHERE yt2.Url = yt.Url AND yt.id != yt2.id);

DELETE FROM your_table yt WHERE yt.Url IN (SELECT yt2.Url from your_table WHERE yt2.id < yt.id)

CodePudding user response:

I think you want to delete the doubles values group by url, so you can try this :

DELETE t FROM your_table t 
INNER JOIN (
   SELECT Url,MIN(count) AS min_count
   FROM your_table
   GROUP BY Url 
   HAVING COUNT(ID) > 1
) as t2 on t2.Url = t.Url
WHERE t.count > t2.min_count
  • Related