I need to write a MySQL query that returns the # of occurrences of the mode of each column individually without explicitly specifying the column names.
Assume the data is:
| apples | bananas | oranges |
| 4 | 4 | 3 |
| 2 | 2 | 1 |
| 4 | 3 | 5 |
| 3 | 3 | 5 |
| 4 | 1 | 5 |
The result I'm looking for is:
| mode | count |
| 4 | 3 |
| 3 | 2 |
| 5 | 3 |
To get mode for an individual column (apples):
SELECT apples AS mode, COUNT(*) AS Count
FROM tablename
GROUP BY apples
HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename GROUP BY apples);
To return column names, I can perform the following:
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'tablename'
Inefficiently and with explicit column names, I can achieve the result with UNION:
SELECT apples AS mode, COUNT(*) AS Count
FROM tablename
GROUP BY apples
HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename GROUP BY apples)
UNION
SELECT bananas AS mode, COUNT(*) AS Count
FROM tablename
GROUP BY bananas
HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename GROUP BY bananas)
UNION
SELECT oranges AS mode, COUNT(*) AS Count
FROM tablename
GROUP BY oranges
HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename GROUP BY oranges)
I've been trying a bunch of different queries incorporating the two with GROUP BY and subqueries, but they are a mess, and I'm not making much headway. I have yet to generate a query to efficiently display the mode of each column with explicitly specifying column names (let alone using column names returned by the second query).
For example (doesn't feel like I'm even on the right track):
SELECT COUNT(*) AS count
FROM tablename
GROUP BY (
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'tablename')
HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename
GROUP BY (
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'tablename'))
Thank you!!
CodePudding user response:
It looks not possible to get the expected result using pure SQL in a single query, but it might build a dynamic one like below:
DROP TABLE IF EXISTS archives.sample_table;
CREATE TABLE archives.sample_table (
apples INT,
bananas INT,
oranges INT
);
INSERT INTO sample_table VALUES
( 4 , 4 , 3 ),
( 2 , 2 , 1 ),
( 4 , 3 , 5 ),
( 3 , 3 , 5 ),
( 4 , 1 , 5 )
;
SET SESSION group_concat_max_len = 9999999;
SET @table_name = 'sample_table';
SET @table_schema = 'archives';
WITH RECURSIVE column_info AS (
SELECT ORDINAL_POSITION column_index, COLUMN_NAME column_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @table_name and TABLE_SCHEMA = @table_schema
),
query_text AS (
SELECT i.column_index, CONCAT('SELECT column_name, column_value, counts FROM ( SELECT ''', i.column_name, ''' column_name, `', i.column_name, '` column_value, COUNT(*) counts, RANK() OVER(ORDER BY COUNT(*) DESC) rk FROM ', @table_name, ' GROUP BY `', i.column_name, '` ) r WHERE rk = 1') single_query
FROM column_info i
WHERE i.column_index = 1
UNION ALL
SELECT i.column_index, CONCAT('SELECT column_name, column_value, counts FROM ( SELECT ''', i.column_name, ''' column_name, `', i.column_name, '` column_value, COUNT(*) counts, RANK() OVER(ORDER BY COUNT(*) DESC) rk FROM ', @table_name, ' GROUP BY `', i.column_name, '` ) r WHERE rk = 1') single_query
FROM query_text prev
JOIN column_info i ON prev.column_index 1 = i.column_index
)
SELECT GROUP_CONCAT(single_query SEPARATOR ' UNION ALL ') INTO @stat_query
FROM query_text
;
PREPARE stmt FROM @stat_query
;
EXECUTE stmt
;
DEALLOCATE PREPARE stmt
;
Sample output
mysql> DROP TABLE IF EXISTS archives.sample_table;
Query OK, 0 rows affected (0.03 sec)
mysql> CREATE TABLE archives.sample_table ( apples INT, bananas INT, oranges INT);
Query OK, 0 rows affected (0.05 sec)
mysql> INSERT INTO sample_table VALUES ( 4 , 4 , 3 ),( 2 , 2 , 1 ),( 4 , 3 , 5 ),( 3 , 3 , 5 ),( 4 , 1 , 5 );
Query OK, 5 rows affected (0.01 sec)
Records: 5 Duplicates: 0 Warnings: 0
mysql> SET SESSION group_concat_max_len = 9999999;
Query OK, 0 rows affected (0.00 sec)
mysql> SET @table_name = 'sample_table';
Query OK, 0 rows affected (0.00 sec)
mysql> SET @table_schema = 'archives';
Query OK, 0 rows affected (0.00 sec)
mysql> WITH RECURSIVE column_info AS ( SELECT ORDINAL_POSITION column_index, COLUMN_NAME column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @table_name and TABLE_SCHEMA = @table_schema),query_text AS ( SELECT i.column_index, CONCAT('SELECT column_name, column_value, counts FROM ( SELECT ''', i.column_name, ''' column_name, `', i.column_name, '` column_value, COUNT(*) counts, RANK() OVER(ORDER BY COUNT(*) DESC) rk FROM ', @table_name, ' GROUP BY `', i.column_name, '` ) r WHERE rk = 1') single_query FROM column_info i WHERE i.column_index = 1 UNION ALL SELECT i.column_index, CONCAT('SELECT column_name, column_value, counts FROM ( SELECT ''', i.column_name, ''' column_name, `', i.column_name, '` column_value, COUNT(*) counts, RANK() OVER(ORDER BY COUNT(*) DESC) rk FROM ', @table_name, ' GROUP BY `', i.column_name, '` ) r WHERE rk = 1') single_query FROM query_text prev JOIN column_info i ON prev.column_index 1 = i.column_index) SELECT GROUP_CONCAT(single_query SEPARATOR ' UNION ALL ') INTO @stat_query FROM query_text;
Query OK, 1 row affected (0.00 sec)
mysql> PREPARE stmt FROM @stat_query;
Query OK, 0 rows affected (0.00 sec)
Statement prepared
mysql> EXECUTE stmt;
------------- -------------- --------
| column_name | column_value | counts |
------------- -------------- --------
| apples | 4 | 3 |
| bananas | 3 | 2 |
| oranges | 5 | 3 |
------------- -------------- --------
3 rows in set (0.00 sec)
mysql> DEALLOCATE PREPARE stmt;
Query OK, 0 rows affected (0.00 sec)
CodePudding user response:
Per @bill-karwin and @shadow, any attempt to dynamically set the columns in a query are impossible, so I:
- Created a procedure that generates the query text to produce the modes with UNIONs- it grabs the columns from INFORMATION_SCHEMA and then loops through the columns to create the query:
CREATE DEFINER=`root`@`localhost` PROCEDURE `column_modes`(OUT strsql TEXT)
BEGIN
SET @cols = (SELECT COUNT(*)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'tablename');
SET @ct = (SELECT COUNT(*) FROM tablename);
SET @n = 0;
SET @sql = '';
WHILE @n < @cols DO
PREPARE stmt FROM "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'tablename' LIMIT ?, 1 INTO @col";
EXECUTE stmt USING @n;
DEALLOCATE PREPARE stmt;
SET @sbq = CONCAT('SELECT ',
@col,
' AS mode, COUNT(*)/',
@ct,
' AS count FROM tablename GROUP BY ',
@col,
' HAVING COUNT(*) >= ALL (SELECT COUNT(*) FROM tablename GROUP BY ',
@col,
')');
SET @sql = CONCAT(@sql,
"SELECT '",
@col,
"' as 'column', GROUP_CONCAT(mode SEPARATOR ',') as mode, GROUP_CONCAT(count SEPARATOR ',') as count FROM (",
@sbq,
') temp');
IF @n != (@cols - 1) THEN
SET @sql = CONCAT(@sql, ' UNION ');
END IF;
SET @n = @n 1;
END WHILE;
-- SELECT @sql;
SET strsql = @sql;
END
- Executed the query with PREPARE:
CALL column_modes(@thisvar);
SELECT @thisvar;
PREPARE stmt FROM @thisvar;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
Thanks very much to @ProGu for a second solution as well.