Home > OS >  How to disable auto-generation of default fields in ElasticSearch Query with TermsAggregationBuilder
How to disable auto-generation of default fields in ElasticSearch Query with TermsAggregationBuilder

Time:06-16

I am migrating from ElasticSearch v1.0.0 to v7.13.1. I know that support for Type specification has been removed by ElasticSearch version beyond 7.0.0. Also, there are certain improvements done by the ElasticSearch in terms of classes such as TermsAggregationBuilder replaces TermsBuilder.

But when I am preparing queries using QueryBuilders and AggregationBuilder, I could see some extra fields generated, that I don't want.

Is there any way to avoid them programmatically?

Before

private TermsBuilder createAggreationsUriDetails() {
        return AggregationBuilders
                .terms(xxxxxxxx)...

After

private TermsAggregationBuilder createAggreationsUriDetails() {
        return AggregationBuilders
                .terms(ElasticConstants.URI)...

Also I am using matchQuery() to prepare match query with the upgraded ES version. Still I could see some extra fields. Same goes for Order as well.

Query comparison with old and new elasticsearch version

Before

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "uri.raw": {
              "query": "sample_uri",
              "type": "boolean"
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "from": 1655145000000,
              "to": 1655231400000,
              "include_lower": "true",
              "include_upper": "false",
              "format": "epoch_millis"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "uri": {
      "terms": {
        "field": "uri.raw",
        "size": 1,
        "order": {
          "_count": "desc"
        }
      },
      "aggregations": {
        "client_id": {
          "terms": {
            "field": "client_id",
            "size": 10000,
            "order": {
              "_count": "desc"
            }
          },
          "aggregations": {
            "response_code": {
              "terms": {
                "field": "response_code.raw",
                "size": 8,
                "order": {
                  "_count": "desc"
                }
              },
              "aggregations": {
                "datetime": {
                  "date_histogram": {
                    "field": "@timestamp",
                    "interval": "1m",
                    "min_doc_count": 1
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Query developed with new ES version QueryBuilder

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "uri.raw": {
              "query": "sample_url",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": "true",
              "lenient": "false",
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": "true",
              "boost": 1
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "from": 1655145000000,
              "to": 1655231400000,
              "include_lower": "true",
              "include_upper": "false",
              "format": "epoch_millis",
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": "true",
      "boost": 1
    }
  },
  "aggs": {
    "uri": {
      "terms": {
        "field": "uri.raw",
        "size": 1,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": "false",
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "client_id": {
          "terms": {
            "field": "client_id",
            "size": 10000,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": "false",
            "order": [
              {
                "_count": "desc"
              },
              {
                "_key": "asc"
              }
            ]
          },
          "aggregations": {
            "response_code": {
              "terms": {
                "field": "response_code.raw",
                "size": 8,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                  {
                    "_count": "desc"
                  },
                  {
                    "_key": "asc"
                  }
                ]
              },
              "aggregations": {
                "datetime": {
                  "date_histogram": {
                    "field": "@timestamp",
                    "interval": "60000ms",
                    "offset": 0,
                    "order": {
                      "_key": "asc"
                    },
                    "keyed": "false",
                    "min_doc_count": 1
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

CodePudding user response:

What you see as a extra fields are actually the params of the query for example match query in your 7.X looks like below:

"match": {
            "uri.raw": {
              "query": "sample_url",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": "true",
              "lenient": "false",
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": "true",
              "boost": 1
            }
          }

These operator, prefix_length, lenient all are the params of match query and even if you don't provide it will be added with their default values, these will be added at Elasticsearch side when you hit the query in JSON format without these params, so don't worry about them, if you want you can change some of these params value to see the corresponding impact on your query results, like change operator to AND and number of search results for multi-terms will be less.

Note: You can also check the code of MatchQueryBuilder in the Elasticsearch code base to understand they are using the builder design pattern, and how they are passing the default values of the params.

Hope this helps.

CodePudding user response:

You can not disable that because it is default value generated by Elasticsearch.

Also, You are able to see as you are printing query in console but it is expected behaviour.

Below is simple match query with required params:

GET /_search
{
  "query": {
    "match": {
      "message": {
        "query": "this is a test"
      }
    }
  }
}

But when you create above query using Java client and print in console it will look like below as it will print other default param value as well which is pass to Elasticsearch while query is executed.

GET /_search
{
  "query": {
    "match": {
      "message": {
              "query": "this is a test",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": "true",
              "lenient": "false",
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": "true",
              "boost": 1
      }
    }
  }
}
  • Related