this is my query which groups by per date and user and I need to extract the following information from it:
How many requests to endpoint user submitted.
I already have this information. It results in6
from the example below.For the date range, how many days did the user submit at least 1 request to the endpoint? It means if the user requested the endpoint 50 times on one day, that would still only count as 1 day
I need to achieve this by augmenting the query to return the count for each of the buckets.
{
"query": {
"bool": {
"filter": [
{
"range": {
"json.@timestamp": {
"gt": "2021-08-22T00:00:00.000Z",
"lt": "2022-10-22T13:41:09.000Z"
}
}
},
{
"term": {
"json.path": "/api/v1/discover"
}
},
{
"wildcard": {
"container.image.name": {
"value": "*prod*"
}
}
}
]
}
},
"aggs": {
"group_by_userId": {
"terms": {
"field": "json.userId"
},
"aggs": {
"group_by_timestamp": {
"date_histogram": {
"field": "@timestamp",
"interval" : "1d"
}
}
}
}
}
}
This results in:
{
"aggregations": {
"group_by_userId": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1283",
"doc_count": 6,
"group_by_timestamp": {
"buckets": [
{
"key_as_string": "2022-10-07T00:00:00.000Z",
"key": 1665100800000,
"doc_count": 4
},
{
"key_as_string": "2022-10-08T00:00:00.000Z",
"key": 1665187200000,
"doc_count": 0
},
{
"key_as_string": "2022-10-09T00:00:00.000Z",
"key": 1665273600000,
"doc_count": 0
},
{
"key_as_string": "2022-10-10T00:00:00.000Z",
"key": 1665360000000,
"doc_count": 2
}
]
}
}
]
}
}
}
How can I augment it to return count for each of the buckets as well?
For the given example, bucket count should be 2
(doc_count
greater than 0)
And this is a sample document for recreation purposes:
{
"_index": "filebeat-7.16.3-2022.10.10",
"_type": "_doc",
"_id": "jsWEwoMBBB8VHDQ_esJw",
"_version": 1,
"_score": 1,
"_source": {
"@timestamp": "2022-10-10T15:30:01.000Z",
"json": {
"userId": 4479,
"@timestamp": "2022-10-10T15:30:01Z",
"bodySize": 118,
"caller": "middlewares/logger.go:65",
"error": "Error #01: user addresses are required.\n",
"transaction.id": "76312bca3aa68f1b",
"rawQuery": "",
"latency": "64.561µs",
"trace.id": "76312bca3aa68f1b8ec1cdeb141ad6fd",
"log.level": "warning",
"path": "/api/v1/discover",
"method": "GET",
"message": "",
"clientIP": "172.31.20.20",
"status": 400,
"referrer": ""
},
"container": {
"id": "34965221589",
"runtime": "docker",
"image": {
"name": "amazonaws.com/app:prod-97149bd4-1999999999"
}
}
}
}
CodePudding user response:
I have used "min_doc_count": 1 in date histogram to remove buckets with zero count and stats_bucket to get bucket count
"query": {
"bool": {
"filter": [
{
"range": {
"json.@timestamp": {
"gt": "2021-08-22T00:00:00.000Z",
"lt": "2022-10-22T13:41:09.000Z"
}
}
},
{
"term": {
"json.path.keyword": "/api/v1/discover"
}
},
{
"wildcard": {
"container.image.name": {
"value": "*prod*"
}
}
}
]
}
},
"aggs": {
"group_by_userId": {
"terms": {
"field": "json.userId"
},
"aggs": {
"group_by_timestamp": {
"date_histogram": {
"field": "@timestamp",
"interval": "1d",
"min_doc_count": 1
}
},
"count_buckets": {
"stats_bucket": {
"buckets_path": "group_by_timestamp._count"
}
}
}
}
}
}