I'm trying to provide a SQL query to find which XNumber
s had the most YNumber
s. Here is my data:
-- Create Tables
-- Create X Table
CREATE TABLE X (
XID int identity,
XNumber varchar(100) not null,
XDescription varchar(100) not null,
XCode varchar(100) not null,
VIT varchar(100) not null,
XLocation varchar(100) not null,
CONSTRAINT PK_X PRIMARY KEY (XID),
CONSTRAINT U1_X UNIQUE (XNumber),
CONSTRAINT U2_X UNIQUE (XCode)
)
-- Create Y Table
CREATE TABLE Y (
YID int identity,
YNumber varchar(100) not null,
YDescription varchar(100) not null,
YDate datetime not null default GetDate(),
CONSTRAINT PK_Y PRIMARY KEY(YID),
CONSTRAINT U1_Y UNIQUE(YNumber)
)
-- Create XY Table
CREATE TABLE XY (
XYID int identity,
XID int not null,
YID int not null,
CONSTRAINT PK_XY PRIMARY KEY(XYID),
CONSTRAINT FK1_XY FOREIGN KEY(XID) REFERENCES X(XID),
CONSTRAINT FK2_XY FOREIGN KEY(YID) REFERENCES Y(YID)
)
-- Insert Values in X
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('ASDLKVNAEQRIUNBVLAXKD', 'THDSDF', 'Violet', 2, 'FROP');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GBOGJMBNNUTHDFHJG', 'TFGSDFG', 'Pink', 20, 'DOOO');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GKHL', 'SDFGSDF', 'Yellow', 21, 'DSUIO');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('DELKMDTKJGBT', 'HNYDGH', 'Green', 20, 'KABBA');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('WRTOUBHNPSDFIVKM', 'OYIU', 'Aquamarine', 19, 'SANFRIO');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('FGBKMOSEKFVKF', 'DCGTH', 'Red', 8, 'BOB');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GBIMGIK', 'FTGHJM', 'Mauv', 10, 'CHURCH');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('JLJLH', 'FCTGHKII', 'Goldenrod', 5, 'ZAN');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('YITORKRKG', 'ITRTGJJ', 'Maroon', 7, 'ZERR');
insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('QWYTWEGD', 'PRTHJHJUKKM', 'Khaki', 9, 'ZAB');
-- Insert Values in Y
insert into Y (YNumber, YDescription, YDate) values ('8e3ac22d07ee88efa9e91fc0830cdee0f1263d82', 'POSTY', '12/22/2019');
insert into Y (YNumber, YDescription, YDate) values ('735c653fec21726598b47b8c59f7b4a4a8ee3618', 'PUTH 3500', '1/14/2021');
insert into Y (YNumber, YDescription, YDate) values ('ef3b991a287db1c3bfd9c568972bc35e5c7c6218', 'BTS', '4/4/2021');
insert into Y (YNumber, YDescription, YDate) values ('a45cec89ba00b77439802b342ce16be4c8ddbee4', 'JLO', '12/3/2019');
insert into Y (YNumber, YDescription, YDate) values ('747a92666a65947e9f03fd7cf7ad55fdd803a2ab', 'JT', '5/1/2020');
insert into Y (YNumber, YDescription, YDate) values ('19a5e1d0c75abea249e50997dd1252285c36404c', 'M5', '1/13/2022');
insert into Y (YNumber, YDescription, YDate) values ('1fe285c2249937f9c5c7608f46be736840ef6007', 'Laroi', '12/12/2021');
insert into Y (YNumber, YDescription, YDate) values ('e6d3614571648dceae07a4813824904f70bc4bef', 'Biebs', '5/1/2022');
insert into Y (YNumber, YDescription, YDate) values ('2a7401fd3f68e09f45564ff29fc4b16c1f7e88ba', 'The Weeknd', '2/21/2022');
insert into Y (YNumber, YDescription, YDate) values ('20a451d4be1894e8f585af47807df6cbe55edfe4', 'Taylor', '2/1/2022');
-- Insert Values in XY
insert into XY (XID, YID) values (2, 6);
insert into XY (XID, YID) values (6, 3);
insert into XY (XID, YID) values (3, 3);
insert into XY (XID, YID) values (9, 8);
insert into XY (XID, YID) values (5, 7);
insert into XY (XID, YID) values (4, 1);
insert into XY (XID, YID) values (3, 7);
insert into XY (XID, YID) values (3, 2);
insert into XY (XID, YID) values (5, 9);
insert into XY (XID, YID) values (6, 5);
Here is what I've tried:
SELECT
X.XNumber,
COUNT(XNumber) AS XFreq,
Y.YNumber
--COUNT(YNumber) AS YFreq
FROM XY
JOIN X ON X.XID = XY.XID
JOIN Y ON Y.YID = XY.YID
GROUP BY XNumber, YNumber
The result comes back as a row of just 1
s, which isn't correct.
Desired output would be a SELECT statement that answers the question: Which XNumber
s had the most YNumber
s?
CodePudding user response:
For each "Xnumber", you want to count the "YNumber", so just count "YNumber" and group by "Xnumber".
Since you only need the most "YNumber"s, use RANK()
function.
See db<>fiddle
WITH tmp AS (
SELECT
X.XNumber,
COUNT(Y.YNumber) AS YFreq,
RANK() OVER(ORDER BY COUNT(Y.YNumber) DESC) AS rnk
FROM XY
JOIN X ON X.XID = XY.XID
JOIN Y ON Y.YID = XY.YID
GROUP BY X.XNumber
)
SELECT tmp.*
FROM tmp
WHERE rnk = 1
CodePudding user response:
You're grouping by both columns. All rows are unique, which means each group will have a single row only. If you want to count the Ys, group by X only :
SELECT
X.XNumber,
COUNT(*) AS XFreq
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
ORDER BY Count(*) desc
-------------------------
XNumber XFreq
GKHL 3
WRTOUBHNPSDFIVKM 2
FGBKMOSEKFVKF 2
GBOGJMBNNUTHDFHJG 1
YITORKRKG 1
DELKMDTKJGBT 1
You don't need to JOIN with Y because you aren't using any information from that table.
You can add a ranking with ROW_NUMBER()
as well, to rank rows by count :
SELECT
X.XNumber,
COUNT(*) AS XFreq,
ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
ORDER BY Count(*) desc
-------------------------
XNumber XFreq RN
GKHL 3 1
WRTOUBHNPSDFIVKM 2 2
FGBKMOSEKFVKF 2 3
GBOGJMBNNUTHDFHJG 1 4
YITORKRKG 1 5
DELKMDTKJGBT 1 6
You can't use ranking functions in a WHERE clause though. To filter by the row number column (RN) you need to wrap the original query as a CTE or subquery
;with ranks as (
SELECT
X.XNumber,
COUNT(*) AS XFreq,
ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
)
SELECT *
FROM ranks
WHERE RN=1
-------------------
XNumber XFreq RN
GKHL 3 1
or the equivalent
SELECT *
FROM (
SELECT
X.XNumber,
COUNT(*) AS XFreq,
ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
) ranks
WHERE RN=1
CodePudding user response:
You can simplify this quite a bit. You don't need to join Y
and you don't need to use ROW_NUMBER
. You can use TOP (1)
and ORDER BY
instead.
Just GROUP BY XNumber
then COUNT(*)
, and take the highest count.
SELECT TOP (1)
X.XNumber,
COUNT(*) AS YFreq
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY X.XNumber
ORDER BY
YFreq DESC;