Home > Software engineering >  Elasticsearch complex aggregation on match results
Elasticsearch complex aggregation on match results

Time:02-16

I'm trying to get the top 5 teams based on win rate (matches won / all matches) having the match result documents in the index (see below).

The fields I need to get in the bucket:

  • apiId
  • name
  • winrate (won/total)

I guess this would require complex aggregation calculation witch is far beyond my current elasticsearch skills.

Elasticsearch version: 7.15.2

Could anyone help me with such elasticsearch query?

Thanks a lot in advance!

{
  "lastModified": "2022-01-14T09:33:48.232Z",
  "uuid": "01234567",
  "started": "2022-01-14T09:31:27.651Z",
  "editing": false,
  "approved": true,
  "statistics": {
    "teams": [
      {
        "name": "Team1",
        "score": 0,
        "winner": false,
        "apiId": "1"
      },
      {
        "name": "Team2",
        "score": 2,
        "winner": true,
        "apiId": "2"
      }
    ]
  }
}

and the mapping:

{
    "mappings": {
        "properties": {
            "_class": {
                "type": "keyword",
                "index": false,
                "doc_values": false
            },
            "approved": {
                "type": "boolean"
            },
            "editing": {
                "type": "boolean"
            },
            "ended": {
                "type": "date",
                "format": "date_optional_time||epoch_millis"
            },
            "game": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "lastModified": {
                "type": "date",
                "format": "date_optional_time||epoch_millis"
            },
            "started": {
                "type": "date",
                "format": "date_optional_time||epoch_millis"
            },
            "statistics": {
                "type": "nested",
                "include_in_parent": true,
                "properties": {
                    "_class": {
                        "type": "keyword",
                        "index": false,
                        "doc_values": false
                    },
                    "teams": {
                        "type": "nested",
                        "include_in_parent": true,
                        "properties": {
                            "_class": {
                                "type": "keyword",
                                "index": false,
                                "doc_values": false
                            },
                            "apiId": {
                                "type": "long"
                            },
                            "name": {
                                "type": "text",
                                "fields": {
                                    "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256
                                    }
                                }
                            },
                            "score": {
                                "type": "long"
                            },
                            "winner": {
                                "type": "boolean"
                            }
                        }
                    }
                }
            },
            "uuid": {
                "type": "keyword"
            }
        }
    }
}

Edit: Based on @ilvar answer, I constructed the query:

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "approved": true
                    }
                }
            ]
        }
    },
    "size": 0,
    "aggs": {
        "top_teams": {
            "nested": {
                "path": "statistics.teams"
            },
            "aggs": {
                "the_all": {
                    "multi_terms": {
                        "terms": [
                            {
                                "field": "statistics.teams.name.keyword"
                            },
                            {
                                "field": "statistics.teams.apiId"
                            }
                        ]
                    }
                },
                "the_won": {
                    "filter": {
                        "terms": {
                            "statistics.teams.winner": [
                                true
                            ]
                        }
                    },
                    "aggs": {
                        "teams": {
                            "multi_terms": {
                                "terms": [
                                    {
                                        "field": "statistics.teams.name.keyword"
                                    },
                                    {
                                        "field": "statistics.teams.apiId"
                                    }
                                ]
                            }
                        }
                    }
                }
            }
        }
    }
}

Which gives me:

{
    "took": 20,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "top_teams": {
            "doc_count": 10,
            "the_all": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": [
                            "Team1",
                            2
                        ],
                        "key_as_string": "Team1|2",
                        "doc_count": 2
                    },
                    {
                        "key": [
                            "Team2",
                            3
                        ],
                        "key_as_string": "Team2|3",
                        "doc_count": 2
                    },
                    {
                        "key": [
                            "Team3",
                            5
                        ],
                        "key_as_string": "Team3|5",
                        "doc_count": 2
                    },
                    {
                        "key": [
                            "Team4",
                            1
                        ],
                        "key_as_string": "Team4|1",
                        "doc_count": 1
                    },
                    {
                        "key": [
                            "Team5",
                            4
                        ],
                        "key_as_string": "Team5|4",
                        "doc_count": 1
                    },
                    {
                        "key": [
                            "Team6",
                            7
                        ],
                        "key_as_string": "Team6|7",
                        "doc_count": 1
                    },
                    {
                        "key": [
                            "Team7",
                            6
                        ],
                        "key_as_string": "Team7|6",
                        "doc_count": 1
                    }
                ]
            },
            "the_won": {
                "doc_count": 4,
                "teams": {
                    "doc_count_error_upper_bound": 0,
                    "sum_other_doc_count": 0,
                    "buckets": [
                        {
                            "key": [
                                "Team2",
                                5
                            ],
                            "key_as_string": "Team2|5",
                            "doc_count": 2
                        },
                        {
                            "key": [
                                "Team4",
                                2
                            ],
                            "key_as_string": "Team4|2",
                            "doc_count": 1
                        },
                        {
                            "key": [
                                "Team7",
                                3
                            ],
                            "key_as_string": "Team7|3",
                            "doc_count": 1
                        }
                    ]
                }
            }
        }
    }
}

But I still cannot get the winrate from two siblings, where one sibling might have missing teams that have not won any match. Should I use some pipeline aggregation?

CodePudding user response:

This is very similar to the example docs have for nested aggregation. The only difference would be that you'll have a terms aggregation on the top level instead of filter so you get back all of the teams.

CodePudding user response:

The final solution I came out with is bellow. the sort pipeline aggregation did the final part of the job

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "approved": true
                    }
                }
            ]
        }
    },
    "size": 0,
    "aggs": {
        "top_teams": {
            "nested": {
                "path": "statistics.teams"
            },
            "aggs": {
                "the_all": {
                    "terms": {
                        "field": "statistics.teams.apiId"
                    },
                    "aggs": {
                        "the_won_by_team": {
                            "filter": {
                                "terms": {
                                    "statistics.teams.winner": [
                                        true
                                    ]
                                }
                            }
                        },
                        "the_lost_by_team": {
                            "filter": {
                                "terms": {
                                    "statistics.teams.winner": [
                                        false
                                    ]
                                }
                            }
                        },
                        "the_all_by_team": {
                            "filter": {
                                "terms": {
                                    "statistics.teams.winner": [
                                        true,
                                        false
                                    ]
                                }
                            }
                        },
                        "the_winrate": {
                            "bucket_script": {
                                "buckets_path": {
                                    "the_won_count": "the_won_by_team._count",
                                    "the_all_count": "the_all_by_team._count"
                                },
                                "script": "params.the_won_count / params.the_all_count"
                            }
                        },
                        "the_sort": {
                            "bucket_sort": {
                                "sort": [
                                    {
                                        "the_winrate": "desc"
                                    },
                                    {
                                        "the_all_by_team._count": "desc"
                                    }
                                ],
                                "size": 5
                            }
                        }
                    }
                }
            }
        }
    }
}
  • Related