I'm trying to get the top 5 teams based on win rate (matches won / all matches) having the match result documents in the index (see below).
The fields I need to get in the bucket:
- apiId
- name
- winrate (won/total)
I guess this would require complex aggregation calculation witch is far beyond my current elasticsearch skills.
Elasticsearch version: 7.15.2
Could anyone help me with such elasticsearch query?
Thanks a lot in advance!
{
"lastModified": "2022-01-14T09:33:48.232Z",
"uuid": "01234567",
"started": "2022-01-14T09:31:27.651Z",
"editing": false,
"approved": true,
"statistics": {
"teams": [
{
"name": "Team1",
"score": 0,
"winner": false,
"apiId": "1"
},
{
"name": "Team2",
"score": 2,
"winner": true,
"apiId": "2"
}
]
}
}
and the mapping:
{
"mappings": {
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"approved": {
"type": "boolean"
},
"editing": {
"type": "boolean"
},
"ended": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"game": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"lastModified": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"started": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"statistics": {
"type": "nested",
"include_in_parent": true,
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"teams": {
"type": "nested",
"include_in_parent": true,
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"apiId": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"score": {
"type": "long"
},
"winner": {
"type": "boolean"
}
}
}
}
},
"uuid": {
"type": "keyword"
}
}
}
}
Edit: Based on @ilvar answer, I constructed the query:
{
"query": {
"bool": {
"must": [
{
"term": {
"approved": true
}
}
]
}
},
"size": 0,
"aggs": {
"top_teams": {
"nested": {
"path": "statistics.teams"
},
"aggs": {
"the_all": {
"multi_terms": {
"terms": [
{
"field": "statistics.teams.name.keyword"
},
{
"field": "statistics.teams.apiId"
}
]
}
},
"the_won": {
"filter": {
"terms": {
"statistics.teams.winner": [
true
]
}
},
"aggs": {
"teams": {
"multi_terms": {
"terms": [
{
"field": "statistics.teams.name.keyword"
},
{
"field": "statistics.teams.apiId"
}
]
}
}
}
}
}
}
}
}
Which gives me:
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"top_teams": {
"doc_count": 10,
"the_all": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": [
"Team1",
2
],
"key_as_string": "Team1|2",
"doc_count": 2
},
{
"key": [
"Team2",
3
],
"key_as_string": "Team2|3",
"doc_count": 2
},
{
"key": [
"Team3",
5
],
"key_as_string": "Team3|5",
"doc_count": 2
},
{
"key": [
"Team4",
1
],
"key_as_string": "Team4|1",
"doc_count": 1
},
{
"key": [
"Team5",
4
],
"key_as_string": "Team5|4",
"doc_count": 1
},
{
"key": [
"Team6",
7
],
"key_as_string": "Team6|7",
"doc_count": 1
},
{
"key": [
"Team7",
6
],
"key_as_string": "Team7|6",
"doc_count": 1
}
]
},
"the_won": {
"doc_count": 4,
"teams": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": [
"Team2",
5
],
"key_as_string": "Team2|5",
"doc_count": 2
},
{
"key": [
"Team4",
2
],
"key_as_string": "Team4|2",
"doc_count": 1
},
{
"key": [
"Team7",
3
],
"key_as_string": "Team7|3",
"doc_count": 1
}
]
}
}
}
}
}
But I still cannot get the winrate from two siblings, where one sibling might have missing teams that have not won any match. Should I use some pipeline aggregation?
CodePudding user response:
This is very similar to the example docs have for nested aggregation. The only difference would be that you'll have a terms
aggregation on the top level instead of filter
so you get back all of the teams.
CodePudding user response:
The final solution I came out with is bellow. the sort pipeline aggregation did the final part of the job
{
"query": {
"bool": {
"must": [
{
"term": {
"approved": true
}
}
]
}
},
"size": 0,
"aggs": {
"top_teams": {
"nested": {
"path": "statistics.teams"
},
"aggs": {
"the_all": {
"terms": {
"field": "statistics.teams.apiId"
},
"aggs": {
"the_won_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
true
]
}
}
},
"the_lost_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
false
]
}
}
},
"the_all_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
true,
false
]
}
}
},
"the_winrate": {
"bucket_script": {
"buckets_path": {
"the_won_count": "the_won_by_team._count",
"the_all_count": "the_all_by_team._count"
},
"script": "params.the_won_count / params.the_all_count"
}
},
"the_sort": {
"bucket_sort": {
"sort": [
{
"the_winrate": "desc"
},
{
"the_all_by_team._count": "desc"
}
],
"size": 5
}
}
}
}
}
}
}
}