SELECT VERSION();
# => 8.0.28
Lets create dummy table with json column and fill the table with 10 000 of rows with random arrays of 10 values:
create table dummy_table2 (id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, json_column JSON);
INSERT INTO dummy_table2 (json_column)
WITH RECURSIVE seq AS (SELECT 0 AS value UNION ALL SELECT value 1 FROM seq LIMIT 100)
SELECT JSON_ARRAY(
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER),
CAST((FLOOR(RAND() * (100 - 1)) 1) AS UNSIGNED INTEGER)
)
FROM seq as seq1 JOIN seq as seq2 ON 1 = 1;
Check the count of rows including some arbitrary number:
select count(*) from dummy_table2 where JSON_CONTAINS(json_column, CAST(51 AS JSON), '$')
# => 969
select count(*) from dummy_table2 where JSON_CONTAINS(json_column, CAST(51 AS JSON), '$') = 1;
# 969
select count(*) from dummy_table2 where CAST(51 AS JSON) MEMBER OF(json_column)
# => 969
select count(*) from dummy_table2 where CAST(51 AS JSON) MEMBER OF(json_column) = 1
# 969
now let's create index on json column and check the results:
CREATE INDEX idx1 on dummy_table2((CAST(json_column as UNSIGNED ARRAY)));
Both MEMBER OF variations return 969:
select count(*) from dummy_table2 where CAST(51 AS JSON) MEMBER OF(json_column)
# => 969
select count(*) from dummy_table2 where CAST(51 AS JSON) MEMBER OF(json_column) = 1
# => 969
but for some reason both of them does not use index, but table scan
-> Aggregate: count(0) (cost=2019.45 rows=9976) (actual time=17.437..17.437 rows=1 loops=1)
-> Filter: (<cache>(cast(51 as json)) member of (dummy_table2.json_column) = 1) (cost=1021.85 rows=9976) (actual time=0.139..17.253 rows=969 loops=1)
-> Table scan on dummy_table2 (cost=1021.85 rows=9976) (actual time=0.037..7.882 rows=10000 loops=1)
JSON_CONTAINS is even more weird:
select count(*) from dummy_table2 where JSON_CONTAINS(json_column, CAST(51 AS JSON), '$') = 1;
# => 969
and again table scan
-> Aggregate: count(0) (cost=2019.45 rows=9976) (actual time=101.233..101.234 rows=1 loops=1)
-> Filter: (json_contains(cast(json_column as unsigned array),json'[51]','$') = 1) (cost=1021.85 rows=9976) (actual time=0.248..101.074 rows=969 loops=1)
-> Table scan on dummy_table2 (cost=1021.85 rows=9976) (actual time=0.036..71.168 rows=10000 loops=1)
But take a look at this count
select count(*) from dummy_table2 where JSON_CONTAINS(json_column, CAST(51 AS JSON), '$')
# => 130
BUT it uses index!
-> Aggregate: count(0) (cost=71.76 rows=130) (actual time=1.473..1.473 rows=1 loops=1)
-> Filter: json_contains(cast(json_column as unsigned array),json'[51]','$') (cost=58.76 rows=130) (actual time=0.067..1.454 rows=130 loops=1)
-> Index range scan on dummy_table2 using idx1 over (51 MEMBER OF (json_column)) (cost=58.76 rows=130) (actual time=0.060..1.153 rows=130 loops=1)
So can anyone explain me what is going on?
- Why index is not used in most examples and how to force myysql to use it
- Why still all of the queries are fast (less than 50ms) on 10k of rows
- Is there a difference to compare the result of
JSON_CONTAINS/MEMBER OF
withtrue/false
and not 1 or 0? I thought there should not be the difference since 0 is false for MSQL and 1 is true.
CodePudding user response:
This is a bug, see Multi-Valued Indexes only takes into account a portion of a whole data.
You can see that when using your index, the execution plan only says "rows=130".
It is fixed in MySQL 8.0.30:
InnoDB: A query on a table with a JSON column returned only a partial result set after adding a multi-valued index. (Bug #106621, Bug #33917625)