I am using elaticsearch v7.1.0
to perform composite aggregation to paginate my results.
The data in the index I am querying and aggregating looks like this.
{
"sequence": "SEQ-A123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392167",
"serialNo": "A5645",
"value": 45,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
},
{
"sequence": "SEQ-B123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392165",
"serialNo": "A8456",
"value": 87,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
},
{
"sequence": "SEQ-C123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392165",
"serialNo": "A59",
"value": 87,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}, ...
The Query I am executing on elasticsearch is this.
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-05-01T00:00:00.000Z",
"lte": "2022-05-30T23:59:59.999Z"
}
}
},
{
"terms": {
"sequence.keyword": [
"SEQ-A123",
"SEQ-B123"
]
}
}
],
"must_not": [
{
"term": {
"serialNo.keyword": "test"
}
}
]
}
},
"size": 0,
"aggs": {
"sequence": {
"composite": {
"sources": [
{
"bkt_sequence": {
"terms": {
"field": "sequence.keyword"
}
}
}
],
"after": {
"bkt_sequence": ""
},
"size": 2
},
"aggs": {
"serialNo": {
"terms": {
"field": "serialNo.keyword"
},
"aggs": {
"usageStats": {
"stats": {
"field": "value"
}
},
"ctags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"owner",
"ctags"
]
}
}
}
}
}
}
}
}
}
The result I am getting against this query looks like this.
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 94,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"sequence": {
"after_key": {
"bkt_sequence": "SEQ-B123"
},
"buckets": [
{
"key": {
"bkt_sequence": "SEQ-A123"
},
"doc_count": 47,
"serialNo": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37,
"buckets": [
"0": {
"key": "A5645",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "1",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392167",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 45,
"max": 45,
"avg": 45,
"sum": 45
}
},
"1": {
"key": "A5646",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "27",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392169",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 85,
"max": 85,
"avg": 85,
"sum": 85
}
},
"2": {
...
},
"3": {
...
},
"4": {
...
},
"5": {
...
},
"6": {
...
},
"7": {
...
},
"8": {
...
},
"9": {
...
}
]
}
},
{
"key": {
"bkt_sequence": "SEQ-B123"
},
"doc_count": 47,
"serialNo": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37,
"buckets": [
"0": {
"key": "A8456",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "48",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392167",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 45,
"max": 45,
"avg": 45,
"sum": 45
}
},
"1": {
"key": "A7590",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "74",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392169",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 85,
"max": 85,
"avg": 85,
"sum": 85
}
},
"2": {
...
},
"3": {
...
},
"4": {
...
},
"5": {
...
},
"6": {
...
},
"7": {
...
},
"8": {
...
},
"9": {
...
}
]
}
}
]
}
}
}
As you can see there are total 94 documents that are hit with this query. 47 belongs to the SEQ-A123 bucket and other 47 belongs to SEQ-B123 bucket but out of 47 only 10 documents are returned in the response.
How can I get all 47 documents in the result and still use pagination at sequence
field level?
CodePudding user response:
Terms Aggregation by default returns only top 10 documents.
Just need to add size
in the terms aggregation in the aggregation serialNo
.
Below is how your query would look like:
POST test_index/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-05-01T00:00:00.000Z",
"lte": "2022-05-30T23:59:59.999Z"
}
}
},
{
"terms": {
"sequence.keyword": [
"SEQ-A123",
"SEQ-B123"
]
}
}
],
"must_not": [
{
"term": {
"serialNo.keyword": "test"
}
}
]
}
},
"size": 0,
"aggs": {
"sequence": {
"composite": {
"sources": [
{
"bkt_sequence": {
"terms": {
"field": "sequence.keyword"
}
}
}
],
"after": {
"bkt_sequence": ""
},
"size": 2
},
"aggs": {
"serialNo": {
"terms": {
"field": "serialNo.keyword",
"size": 100 <----------- Note this here
},
"aggs": {
"usageStats": {
"stats": {
"field": "value"
}
},
"ctags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"owner",
"ctags"
]
}
}
}
}
}
}
}
}
}