I have a query I am trying to optimize but the results aren't making sense to me.
It takes roughly 7 seconds to complete this:
SELECT users.*,states.name AS state_name, CONCAT_WS(" - ",L1.updated_at,L1.last_location) AS last_activity,
(SELECT COUNT(*) FROM user_courses WHERE user_id = users.id) AS num_products_total,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 5) AS num_products_plt,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 7) AS num_products_ce,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 6) AS num_products_sh,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 8) AS num_products_vc,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 9) AS num_products_book
FROM users LEFT JOIN states ON (states.id = users.state_id)
LEFT JOIN logs_user_login AS L1 ON L1.id = (SELECT L2.id FROM logs_user_login AS L2 WHERE L2.user_id = users.id ORDER BY L2.updated_at DESC LIMIT 1) ORDER BY users.id desc LIMIT 0,20;
If I modify (remove counts of course types) to this is takes 2.5 seconds:
SELECT users.*,states.name AS state_name, CONCAT_WS(" - ",L1.updated_at,L1.last_location) AS last_activity
FROM users LEFT JOIN states ON (states.id = users.state_id)
LEFT JOIN logs_user_login AS L1 ON L1.id = (SELECT L2.id FROM logs_user_login AS L2 WHERE L2.user_id = users.id ORDER BY L2.updated_at DESC LIMIT 1) ORDER BY users.id desc LIMIT 0,20;
If I modify (remove get last login log) to this is takes 0.005 seconds:
SELECT users.*,states.name AS state_name,
(SELECT COUNT(*) FROM user_courses WHERE user_id = users.id) AS num_products_total,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 5) AS num_products_plt,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 7) AS num_products_ce,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 6) AS num_products_sh,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 8) AS num_products_vc,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 9) AS num_products_book
FROM users LEFT JOIN states ON (states.id = users.state_id) ORDER BY users.id desc LIMIT 0,20;
UPDATE I narrowed it down further but still doesn't make sense to my why this is happening. If I remove the L1 table references from the SELECT then the query goes 0.005 again even with the original query that took 7 seconds, the only difference is not grabbing the values from L1.
It takes roughly 0.005 seconds to complete this (Only took the L1 returns out of the select):
SELECT users.*,states.name AS state_name,
(SELECT COUNT(*) FROM user_courses WHERE user_id = users.id) AS num_products_total,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 5) AS num_products_plt,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 7) AS num_products_ce,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 6) AS num_products_sh,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 8) AS num_products_vc,
(SELECT COUNT(*) FROM user_courses LEFT JOIN courses ON (courses.id = user_courses.course_id) WHERE user_courses.user_id = users.id AND courses.type_id = 9) AS num_products_book
FROM users LEFT JOIN states ON (states.id = users.state_id)
LEFT JOIN logs_user_login AS L1 ON L1.id = (SELECT L2.id FROM logs_user_login AS L2 WHERE L2.user_id = users.id ORDER BY L2.updated_at DESC LIMIT 1) ORDER BY users.id desc LIMIT 0,20;
Why would I have such a HUGE hit just returning the value from the joined table? It's returning a NULL or a small string under 64 characters.
CodePudding user response:
For the log entry, you don't need a join on a correlated sub-query (which accesses the logs twice, once by user and timestamp, once by id), you can just use a scalar sub-query, much like you did for the course counts.
Conversely, the course counts can be calculated in one pass, using conditional aggregation, reducing passes over those tables.
SELECT
users.*,
states.name AS state_name,
user_course_count.*,
(
SELECT
CONCAT_WS(" - ", updated at, last_location)
FROM
logs_user_login
WHERE
user_id = users.id
ORDER BY
updated_at DESC LIMIT 1
)
AS last_activity
FROM
users
LEFT JOIN
states
ON states.id = users.state_id
LEFT JOIN
(
SELECT
user_courses.user_id,
COUNT(*) AS num_products_total,
COUNT(CASE WHEN courses.type_id = 5 THEN 1 END) AS num_products_5,
COUNT(CASE WHEN courses.type_id = 6 THEN 1 END) AS num_products_6,
COUNT(CASE WHEN courses.type_id = 7 THEN 1 END) AS num_products_7,
COUNT(CASE WHEN courses.type_id = 8 THEN 1 END) AS num_products_8,
COUNT(CASE WHEN courses.type_id = 9 THEN 1 END) AS num_products_9
FROM
user_courses
INNER JOIN
courses
ON courses.id = user_courses.course_id
GROUP BY
user_courses.user_id
)
AS user_course_count
ON users.id = user_course_count.user_id
ORDER BY
users.id DESC LIMIT 0,20;