Home > database >  Elasticsearch exclude key from composite aggregation
Elasticsearch exclude key from composite aggregation

Time:09-17

i need to perform an exclusion of some key in a composite aggregation. here is one document of my index as an example :

{
    "end_date": 1230314400000,
    "parameter_codes": [28, 35, 30],
    "platform_code": "41012",
    "start_date": 1230314400000,
    "station_id": 7833246
}

I perform a search request allowing me to : get a result for each platform_code/parameter_codes couple, plus getting the station_id correspounding plus a paging on the bucket.

here is the request :

{
    "size": 0,
    "query": {
        "match_all": {
            "boost": 1.0
        }
    },
    "_source": false,
    "aggregations": {
        "compositeAgg": {
            "composite": {
                "size": 10,
                "sources": [{
                        "platform_code": {
                            "terms": {
                                "field": "platform_code",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }, {
                        "parameter_codes": {
                            "terms": {
                                "field": "parameter_codes",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }]
            },
            "aggregations": {
                "aggstation_id": {
                    "terms": {
                        "field": "station_id",
                        "size": 2147483647,
                        "min_doc_count": 1,
                        "shard_min_doc_count": 0,
                        "show_term_doc_count_error": false,
                        "order": {
                            "_key": "asc"
                        }
                    }
                },
                "pipe": {
                    "bucket_sort": {
                        "sort": [{
                                "_key": {
                                    "order": "asc"
                                }
                            }],
                        "from": 0,
                        "size": 10,
                        "gap_policy": "SKIP"
                    }
                }
            }
        }
    }
}

this request give me the following results :

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 8,
        "successful": 8,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 3,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "composite#compositeAgg": {
            "after_key": {
                "platform_code": "41012",
                "parameter_codes": 60
            },
            "buckets": [{
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 28
                    },
                    "doc_count": 1,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 30
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 35
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }]
        }
    }
}

this works very well but i need to exclude one or many parameter_code. For example by excluding '35', i want only the keys :

{
   "platform_code": "41012",
   "parameter_codes": 28
}

and

{
   "platform_code": "41012",
   "parameter_codes": 30
}

i tried, many options but can not succeed to perform this. Can anybody know how can i do that?

CodePudding user response:

A script query can be used in composite source to return only specific values of array.

{
  "size": 0,
  "query": {
    "match_all": {
      "boost": 1
    }
  },
  "_source": false,
  "aggregations": {
    "compositeAgg": {
      "composite": {
        "size": 10,
        "sources": [
          {
            "platform_code": {
              "terms": {
                "field": "platform_code.keyword",
                "missing_bucket": false,
                "order": "asc"
              }
            }
          },
          {
            "parameter_codes": {
              "terms": {
                "script": {
                  "source": """
                   def arr=[];
                   for (item in doc['parameter_codes']) {
                       if(item !=35)
                       {
                          arr.add(item);
                       }
                    }
                  return arr"""
                }
              }
            }
          }
        ]
      },
      "aggregations": {
        "aggstation_id": {
          "terms": {
            "field": "station_id",
            "size": 2147483647,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false,
            "order": {
              "_key": "asc"
            }
          }
        },
        "pipe": {
          "bucket_sort": {
            "sort": [
              {
                "_key": {
                  "order": "asc"
                }
              }
            ],
            "from": 0,
            "size": 10,
            "gap_policy": "SKIP"
          }
        }
      }
    }
  }
}

CodePudding user response:

You can try to exclude "parameter_codes=35" this option from the query.

{
      "query": {
        "bool": {
          "must_not": [
            {
              "term": {
                "parameter_codes": {
                  "value": "35"
                }
              }
            }
          ]
        }
      }
    }
  • Related