Home > OS >  Find Total Occurrence of a Variable Based on Another Variable in SQL
Find Total Occurrence of a Variable Based on Another Variable in SQL

Time:06-16

I'm trying to provide a SQL query to find which XNumbers had the most YNumbers. Here is my data:



-- Create Tables
    -- Create X Table
        CREATE TABLE X (
            XID int identity,
            XNumber varchar(100) not null,
            XDescription varchar(100) not null,
            XCode varchar(100) not null,
            VIT varchar(100) not null,
            XLocation varchar(100) not null,
            CONSTRAINT PK_X PRIMARY KEY (XID),
            CONSTRAINT U1_X UNIQUE (XNumber),
            CONSTRAINT U2_X UNIQUE (XCode)
        )
        
    -- Create Y Table
        CREATE TABLE Y (
            YID int identity,
            YNumber varchar(100) not null,
            YDescription varchar(100) not null,
            YDate datetime not null default GetDate(),
            CONSTRAINT PK_Y PRIMARY KEY(YID),
            CONSTRAINT U1_Y UNIQUE(YNumber)
        )
        
    -- Create XY Table
        CREATE TABLE XY (
            XYID int identity,
            XID int not null,
            YID int not null,
            CONSTRAINT PK_XY PRIMARY KEY(XYID),
            CONSTRAINT FK1_XY FOREIGN KEY(XID) REFERENCES X(XID),
            CONSTRAINT FK2_XY FOREIGN KEY(YID) REFERENCES Y(YID)
        )


-- Insert Values in X
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('ASDLKVNAEQRIUNBVLAXKD', 'THDSDF', 'Violet', 2, 'FROP');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GBOGJMBNNUTHDFHJG', 'TFGSDFG',    'Pink', 20, 'DOOO');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GKHL', 'SDFGSDF', 'Yellow', 21, 'DSUIO');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('DELKMDTKJGBT', 'HNYDGH', 'Green', 20, 'KABBA');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('WRTOUBHNPSDFIVKM', 'OYIU', 'Aquamarine', 19, 'SANFRIO');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('FGBKMOSEKFVKF', 'DCGTH', 'Red', 8, 'BOB');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('GBIMGIK', 'FTGHJM', 'Mauv', 10, 'CHURCH');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('JLJLH', 'FCTGHKII', 'Goldenrod', 5, 'ZAN');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('YITORKRKG', 'ITRTGJJ', 'Maroon', 7, 'ZERR');
        insert into X (XNumber, XDescription, XCode, VIT, XLocation) values ('QWYTWEGD', 'PRTHJHJUKKM', 'Khaki', 9, 'ZAB');

    -- Insert Values in Y

        insert into Y (YNumber, YDescription, YDate) values ('8e3ac22d07ee88efa9e91fc0830cdee0f1263d82', 'POSTY', '12/22/2019');
        insert into Y (YNumber, YDescription, YDate) values ('735c653fec21726598b47b8c59f7b4a4a8ee3618', 'PUTH 3500', '1/14/2021');
        insert into Y (YNumber, YDescription, YDate) values ('ef3b991a287db1c3bfd9c568972bc35e5c7c6218', 'BTS', '4/4/2021');
        insert into Y (YNumber, YDescription, YDate) values ('a45cec89ba00b77439802b342ce16be4c8ddbee4', 'JLO', '12/3/2019');
        insert into Y (YNumber, YDescription, YDate) values ('747a92666a65947e9f03fd7cf7ad55fdd803a2ab', 'JT', '5/1/2020');
        insert into Y (YNumber, YDescription, YDate) values ('19a5e1d0c75abea249e50997dd1252285c36404c', 'M5', '1/13/2022');
        insert into Y (YNumber, YDescription, YDate) values ('1fe285c2249937f9c5c7608f46be736840ef6007', 'Laroi', '12/12/2021');
        insert into Y (YNumber, YDescription, YDate) values ('e6d3614571648dceae07a4813824904f70bc4bef', 'Biebs', '5/1/2022');
        insert into Y (YNumber, YDescription, YDate) values ('2a7401fd3f68e09f45564ff29fc4b16c1f7e88ba', 'The Weeknd', '2/21/2022');
        insert into Y (YNumber, YDescription, YDate) values ('20a451d4be1894e8f585af47807df6cbe55edfe4', 'Taylor', '2/1/2022');

    -- Insert Values in XY

        insert into XY (XID, YID) values (2, 6);
        insert into XY (XID, YID) values (6, 3);
        insert into XY (XID, YID) values (3, 3);
        insert into XY (XID, YID) values (9, 8);
        insert into XY (XID, YID) values (5, 7);
        insert into XY (XID, YID) values (4, 1);
        insert into XY (XID, YID) values (3, 7);
        insert into XY (XID, YID) values (3, 2);
        insert into XY (XID, YID) values (5, 9);
        insert into XY (XID, YID) values (6, 5);

Here is what I've tried:

        SELECT 
            X.XNumber,
            COUNT(XNumber) AS XFreq,
            Y.YNumber
            --COUNT(YNumber) AS YFreq
        FROM XY
        JOIN X ON X.XID = XY.XID
        JOIN Y ON Y.YID = XY.YID
        GROUP BY XNumber, YNumber

The result comes back as a row of just 1s, which isn't correct.

Desired output would be a SELECT statement that answers the question: Which XNumbers had the most YNumbers?

CodePudding user response:

For each "Xnumber", you want to count the "YNumber", so just count "YNumber" and group by "Xnumber".

Since you only need the most "YNumber"s, use RANK() function.

See db<>fiddle

        WITH tmp AS (
        SELECT 
            X.XNumber,
            COUNT(Y.YNumber) AS YFreq,
            RANK() OVER(ORDER BY COUNT(Y.YNumber) DESC) AS rnk
        FROM XY
        JOIN X ON X.XID = XY.XID
        JOIN Y ON Y.YID = XY.YID
        GROUP BY X.XNumber
        )
        
        SELECT tmp.*
        FROM tmp
        WHERE rnk = 1

CodePudding user response:

You're grouping by both columns. All rows are unique, which means each group will have a single row only. If you want to count the Ys, group by X only :

SELECT 
    X.XNumber,
    COUNT(*) AS XFreq
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
ORDER BY Count(*) desc
-------------------------
XNumber             XFreq
GKHL                3
WRTOUBHNPSDFIVKM    2
FGBKMOSEKFVKF       2
GBOGJMBNNUTHDFHJG   1
YITORKRKG           1
DELKMDTKJGBT        1

You don't need to JOIN with Y because you aren't using any information from that table.

You can add a ranking with ROW_NUMBER() as well, to rank rows by count :

SELECT 
    X.XNumber,
    COUNT(*) AS XFreq,
    ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY XNumber
ORDER BY Count(*) desc
-------------------------
XNumber             XFreq   RN
GKHL                3       1
WRTOUBHNPSDFIVKM    2       2
FGBKMOSEKFVKF       2       3
GBOGJMBNNUTHDFHJG   1       4
YITORKRKG           1       5
DELKMDTKJGBT        1       6

You can't use ranking functions in a WHERE clause though. To filter by the row number column (RN) you need to wrap the original query as a CTE or subquery

;with ranks as (
    SELECT 
        X.XNumber,
        COUNT(*) AS XFreq,
        ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
    FROM XY
    JOIN X ON X.XID = XY.XID
    GROUP BY XNumber
)
SELECT * 
FROM ranks 
WHERE RN=1
-------------------
XNumber XFreq   RN
GKHL    3       1

or the equivalent

SELECT * 
FROM (
    SELECT 
        X.XNumber,
        COUNT(*) AS XFreq,
        ROW_NUMBER() OVER (ORDER BY COUNT(*) desc) as RN
    FROM XY
    JOIN X ON X.XID = XY.XID
    GROUP BY XNumber
) ranks
WHERE RN=1

CodePudding user response:

You can simplify this quite a bit. You don't need to join Y and you don't need to use ROW_NUMBER. You can use TOP (1) and ORDER BY instead.

Just GROUP BY XNumber then COUNT(*), and take the highest count.

SELECT TOP (1)
  X.XNumber,
  COUNT(*) AS YFreq
FROM XY
JOIN X ON X.XID = XY.XID
GROUP BY X.XNumber
ORDER BY
  YFreq DESC;

db<>fiddle

  • Related