I have the following table:
province_id | owner_id | game_id | resource_production | valid_from | valid_until | static_province_id |
---|---|---|---|---|---|---|
auto incremented Integer | integer | integer | integer | timestamp | timestamp | integer |
I have two indexes:
- test_valid_from(game_id, owner_id, valid_from, valid_until)
- test_valid_until(game_id, owner_id, valid_until, valid_from)
Every time the production of a province changes the valid_until timestamp of the old provinces gets set to the current timestamp. Also a new province is inserted with the current timestamp in the valid_from field. If the province is the newest province of its static_province_id in this game valid_until is null.
My Goal
I want to have a sub 1 second query that can get me the total resource production for every change in resource production of an owner in a specified game.
My first approach
1. Query
SELECT
a.game_id,
a.valid_from,
a.owner_id,
SUM(b.resource_production)
FROM
(select distinct valid_from, owner_id, game_id from province) a
JOIN province b
ON a.game_id = b.game_id
AND a.owner_id = b.owner_id
AND (
(
b.valid_from < a.valid_from
and a.valid_from < b.valid_until
)
OR a.valid_from = b.valid_from
OR (
b.valid_until IS NULL
AND b.valid_from < a.valid_from))
where a.game_id=1 and a.owner_id = 1
GROUP BY a.game_id, a.owner_id, a.valid_from
Explain
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "1773449.97"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"nested_loop": [
{
"table": {
"table_name": "a",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "b",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"a.game_id",
"a.owner_id"
],
"rows_examined_per_scan": 241,
"rows_produced_per_join": 1146881,
"filtered": "22.66",
"index_condition": "(((`testing`.`b`.`valid_from` < `a`.`valid_from`) and (`a`.`valid_from` < `testing`.`b`.`valid_until`)) or (`testing`.`b`.`valid_from` = `a`.`valid_from`) or ((`testing`.`b`.`valid_until` is null) and (`testing`.`b`.`valid_from` < `a`.`valid_from`)))",
"cost_info": {
"read_cost": "1265065.52",
"eval_cost": "114688.15",
"prefix_cost": "1773449.97",
"data_read_per_join": "70M"
},
"used_columns": [
"province_id",
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id",
"static_province_id"
]
}
}
]
}
}
}
Result
game_id | valid_from | owner_id | SUM(b.resource_production) |
---|---|---|---|
1 | '2022-10-25 23:02:13' | 1 | 7829 |
1 | '2022-10-26 00:22:14' | 1 | 7836 |
... | ... | ... | ... |
The query works, but it is way to slow as it takes about 17 seconds.
My second approach
i have read that the mysql optimizer might not use the index in multiple or conditions as it is the case in my first query with the valid_from and valid_until.
2.Query
select g.game_id, g.owner_id, g.valid_from, sum(g.rp) from (
select a_b.game_id, a_b.owner_id, a.resource_production as rp, a_b.valid_from from(select distinct valid_from, owner_id, game_id from province) a_b
join province a
on a.game_id = a_b.game_id
AND a.owner_id = a_b.owner_id
AND
(
a.valid_from < a_b.valid_from
and a_b.valid_from < a.valid_until
)
Union
select b_b.game_id, b_b.owner_id, b.resource_production as rp, b_b.valid_from from(select distinct valid_from, owner_id, game_id from province) b_b
join province b
on b.game_id = b_b.game_id
AND b.owner_id = b_b.owner_id
AND b.valid_from = b_b.valid_from
Union
select c_b.game_id, c_b.owner_id, c.resource_production as rp, c_b.valid_from from(select distinct valid_from, owner_id, game_id from province) c_b
join province c
on c.game_id = c_b.game_id
AND c.owner_id = c_b.owner_id
AND (
c.valid_until IS NULL
AND c_b.valid_from < c.valid_from )) g
where g.game_id=1 and g.owner_id=1
group by g.game_id, g.owner_id, g.valid_from
Explain
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "76805.35"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "g",
"access_type": "ALL",
"rows_examined_per_scan": 682692,
"rows_produced_per_join": 682692,
"filtered": "100.00",
"cost_info": {
"read_cost": "8536.15",
"eval_cost": "68269.20",
"prefix_cost": "76805.35",
"data_read_per_join": "20M"
},
"used_columns": [
"game_id",
"owner_id",
"rp",
"valid_from"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"union_result": {
"using_temporary_table": true,
"select_id": 8,
"table_name": "<union2,4,6>",
"access_type": "ALL",
"query_specifications": [
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "1773449.97"
},
"nested_loop": [
{
"table": {
"table_name": "a_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "a",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"a_b.game_id",
"a_b.owner_id"
],
"rows_examined_per_scan": 241,
"rows_produced_per_join": 562138,
"filtered": "11.11",
"index_condition": "((`testing`.`a`.`valid_from` < `a_b`.`valid_from`) and (`a_b`.`valid_from` < `testing`.`a`.`valid_until`))",
"cost_info": {
"read_cost": "1265065.52",
"eval_cost": "56213.89",
"prefix_cost": "1773449.97",
"data_read_per_join": "34M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id"
]
}
}
]
}
},
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 4,
"cost_info": {
"query_cost": "33802.62"
},
"nested_loop": [
{
"table": {
"table_name": "b_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 5,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "b",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id",
"valid_from"
],
"key_length": "16",
"ref": [
"b_b.game_id",
"b_b.owner_id",
"b_b.valid_from"
],
"rows_examined_per_scan": 4,
"rows_produced_per_join": 89841,
"filtered": "100.00",
"cost_info": {
"read_cost": "22460.27",
"eval_cost": "8984.11",
"prefix_cost": "33802.62",
"data_read_per_join": "5M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"game_id"
]
}
}
]
}
},
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 6,
"cost_info": {
"query_cost": "34611.03"
},
"nested_loop": [
{
"table": {
"table_name": "c_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 7,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "c",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id",
"valid_until"
],
"key_length": "17",
"ref": [
"c_b.game_id",
"c_b.owner_id",
"const"
],
"rows_examined_per_scan": 4,
"rows_produced_per_join": 30713,
"filtered": "33.33",
"index_condition": "((`testing`.`c`.`valid_until` is null) and (`c_b`.`valid_from` < `testing`.`c`.`valid_from`))",
"cost_info": {
"read_cost": "23037.70",
"eval_cost": "3071.39",
"prefix_cost": "34611.03",
"data_read_per_join": "1M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id"
]
}
}
]
}
}
]
}
}
}
}
}
}
}
Result
game_id | owner_id | valid_from | sum(g.rp) |
---|---|---|---|
1 | 1 | '2022-10-25 23:02:13' | 28222 |
1 | 1 | '2022-10-26 00:42:13' | 28229 |
... | ... | ... | ... |
The 2. query neither works nor is it fast enough at about 14 seconds. It doesn't work because it returns the wrong total production for every timestamp. I hope that you can help me because I am out of ideas.
CodePudding user response:
Deleting the two indexes, and adding this one seems to improve performance (When I was able to reproduce a correct set of test-data.... )
The suggested index is:
CREATE INDEX test_key on province(game_id, owner_id, valid_from, valid_until, resource_production);
You can test it by running this DBFIDDLE, and look at the times produce by SHOW PROFILES
.
After running it, comment the CREATE INDEX
that I am suggesting to use, and uncomment the currently commented indexes (the 2 index you have currently), and re-run the fiddle again, to see the changed times in SHOW PROFILES
.
This suggested index is covering, because all used fields are contained in the index.
It would be nice to know if, and how much, this improves ...
CodePudding user response:
Start with moving these into the first 'derived' table:
where a.game_id = 1 and a.owner_id = 1
Have this index (as Luuk suggests):
INDEX(game_id, owner_id, valid_from, valid_until, resource_production)
Then see my questions in the Comments above.
After that, I may have more advice.