Elasticsearch Bucket count-CodePudding

this is my query which groups by per date and user and I need to extract the following information from it:

How many requests to endpoint user submitted. I already have this information. It results in 6 from the example below.
For the date range, how many days did the user submit at least 1 request to the endpoint? It means if the user requested the endpoint 50 times on one day, that would still only count as 1 day I need to achieve this by augmenting the query to return the count for each of the buckets.

{
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "json.@timestamp": {
              "gt": "2021-08-22T00:00:00.000Z",
              "lt": "2022-10-22T13:41:09.000Z"
            }
          }
        },
        {
          "term": {
            "json.path": "/api/v1/discover"
          }
        },
        {
          "wildcard": {
            "container.image.name": {
              "value": "*prod*"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "group_by_userId": {
      "terms": {
        "field": "json.userId"
      },
      "aggs": {
        "group_by_timestamp": {
          "date_histogram": {
            "field": "@timestamp",
        "interval" : "1d"
          }
        }
      }
    }
  }
}

This results in:

    {
      "aggregations": {
        "group_by_userId": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "1283",
              "doc_count": 6,
              "group_by_timestamp": {
                "buckets": [
                  {
                    "key_as_string": "2022-10-07T00:00:00.000Z",
                    "key": 1665100800000,
                    "doc_count": 4
                  },
                  {
                    "key_as_string": "2022-10-08T00:00:00.000Z",
                    "key": 1665187200000,
                    "doc_count": 0
                  },
                  {
                    "key_as_string": "2022-10-09T00:00:00.000Z",
                    "key": 1665273600000,
                    "doc_count": 0
                  },
                  {
                    "key_as_string": "2022-10-10T00:00:00.000Z",
                    "key": 1665360000000,
                    "doc_count": 2
                  }
                ]
              }
            }
          ]
        }
      }
    }

How can I augment it to return count for each of the buckets as well?

For the given example, bucket count should be 2 (doc_count greater than 0)

And this is a sample document for recreation purposes:

{
  "_index": "filebeat-7.16.3-2022.10.10",
  "_type": "_doc",
  "_id": "jsWEwoMBBB8VHDQ_esJw",
  "_version": 1,
  "_score": 1,
  "_source": {
    "@timestamp": "2022-10-10T15:30:01.000Z",
    "json": {
      "userId": 4479,
      "@timestamp": "2022-10-10T15:30:01Z",
      "bodySize": 118,
      "caller": "middlewares/logger.go:65",
      "error": "Error #01: user addresses are required.\n",
      "transaction.id": "76312bca3aa68f1b",
      "rawQuery": "",
      "latency": "64.561µs",
      "trace.id": "76312bca3aa68f1b8ec1cdeb141ad6fd",
      "log.level": "warning",
      "path": "/api/v1/discover",
      "method": "GET",
      "message": "",
      "clientIP": "172.31.20.20",
      "status": 400,
      "referrer": ""
    },
    "container": {
      "id": "34965221589",
      "runtime": "docker",
      "image": {
        "name": "amazonaws.com/app:prod-97149bd4-1999999999"
      }
    }
  }
}

CodePudding user response：

I have used "min_doc_count": 1 in date histogram to remove buckets with zero count and stats_bucket to get bucket count

  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "json.@timestamp": {
              "gt": "2021-08-22T00:00:00.000Z",
              "lt": "2022-10-22T13:41:09.000Z"
            }
          }
        },
        {
          "term": {
            "json.path.keyword": "/api/v1/discover"
          }
        },
        {
          "wildcard": {
            "container.image.name": {
              "value": "*prod*"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "group_by_userId": {
      "terms": {
        "field": "json.userId"
      },
      "aggs": {
        "group_by_timestamp": {
          "date_histogram": {
            "field": "@timestamp",
            "interval": "1d",
            "min_doc_count": 1
          }
        },
        "count_buckets": {
          "stats_bucket": {
            "buckets_path": "group_by_timestamp._count"
          }
        }
      }
    }
  }
}