Home > Mobile >  elasticsearch results have wrong scoring
elasticsearch results have wrong scoring

Time:08-28

I have problem with elasticsearch scoring in my index. First I show the codes then I explain my problem.

my index setting:

{
  "crucial": {
    "aliases": {},
    "mappings": {
      "properties": {
        "brand_id": {
          "type": "long"
        },
        "brand_name": {
          "type": "text",
          "analyzer": "autocomplete",
          "search_analyzer": "autocomplete_search"
        },
        "category_name": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "id": {
          "type": "long"
        },
        "name": {
          "type": "text",
          "analyzer": "autocomplete",
          "search_analyzer": "autocomplete_search"
        }
      }
    },
    "settings": {
      "index": {
        "max_ngram_diff": "10",
        "routing": {
          "allocation": {
            "include": {
              "_tier_preference": "data_content"
            }
          }
        },
        "number_of_shards": "1",
        "provided_name": "crucial",
        "creation_date": "1659346772973",
        "analysis": {
          "analyzer": {
            "autocomplete": {
              "filter": [
                "lowercase"
              ],
              "tokenizer": "autocomplete"
            },
            "autocomplete_search": {
              "filter": [
                "lowercase"
              ],
              "tokenizer": "standard"
            }
          },
          "tokenizer": {
            "autocomplete": {
              "type": "ngram",
              "min_gram": "2",
              "max_gram": "12"
            }
          }
        },
        "number_of_replicas": "1",
        "uuid": "wo245x3nQaCw3s9e0LSrlA",
        "version": {
          "created": "8030299"
        }
      }
    }
  }
}

search query:

GET crucial/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "brand_name": "asus"
          }
        },
        {
          "match": {
            "name": "asus x452l"
          }
        }
      ]
    }
  },
  "aggs": {
    "auto_complete": {
      "terms": {
        "field": "brand_id",
        "order": {
          "_count": "desc"
        }
      }
    }
  }, "size": 20
}

output:

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 319,
      "relation": "eq"
    },
    "max_score": 13.513464,
    "hits": [
      {
        "_index": "crucial",
        "_id": "63400",
        "_score": 13.513464,
        "_source": {
          "id": 63400,
          "name": "asus x409",
          "brand_name": "asus",
          "brand_id": 12,
          "category_name": "x400 series"
        }
      },{
        "_index": "crucial",
        "_id": "63412",
        "_score": 13.279591,
        "_source": {
          "id": 63412,
          "name": "asus x452e",
          "brand_name": "asus",
          "brand_id": 12,
          "category_name": "x400 series"
        }
      }
    ]
  },
  "aggregations": {
    "auto_complete": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 12,
          "doc_count": 297
        },
        {
          "key": 40,
          "doc_count": 22
        }
      ]
    }
  }
}

As you can see the search query is "asus x452l" and I expected "asus x452e" to have maximum score but it's not! what is the problem with scoring and how can I fix this?

CodePudding user response:

The problem was in difference of autocomplete_search tokenizer and autocomplete tokenizer.

"analyzer": {
        "autocomplete": {
          "filter": [
            "lowercase"
          ],
          "tokenizer": "autocomplete"
        },
        "autocomplete_search": {
          "filter": [
            "lowercase"
          ],
          "tokenizer": "standard"
        }
      }

Changing "search_analyzer": "autocomplete_search" to "search_analyzer": "autocomplete" fixed my problem.

  "brand_name": {
      "type": "text",
      "analyzer": "autocomplete",
      "search_analyzer": "autocomplete_search"
  },"name": {
      "type": "text",
      "analyzer": "autocomplete",
      "search_analyzer": "autocomplete_search"
    }
  • Related