I have problem with elasticsearch scoring in my index. First I show the codes then I explain my problem.
my index setting:
{
"crucial": {
"aliases": {},
"mappings": {
"properties": {
"brand_id": {
"type": "long"
},
"brand_name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"category_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
},
"settings": {
"index": {
"max_ngram_diff": "10",
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"number_of_shards": "1",
"provided_name": "crucial",
"creation_date": "1659346772973",
"analysis": {
"analyzer": {
"autocomplete": {
"filter": [
"lowercase"
],
"tokenizer": "autocomplete"
},
"autocomplete_search": {
"filter": [
"lowercase"
],
"tokenizer": "standard"
}
},
"tokenizer": {
"autocomplete": {
"type": "ngram",
"min_gram": "2",
"max_gram": "12"
}
}
},
"number_of_replicas": "1",
"uuid": "wo245x3nQaCw3s9e0LSrlA",
"version": {
"created": "8030299"
}
}
}
}
}
search query:
GET crucial/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"brand_name": "asus"
}
},
{
"match": {
"name": "asus x452l"
}
}
]
}
},
"aggs": {
"auto_complete": {
"terms": {
"field": "brand_id",
"order": {
"_count": "desc"
}
}
}
}, "size": 20
}
output:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 319,
"relation": "eq"
},
"max_score": 13.513464,
"hits": [
{
"_index": "crucial",
"_id": "63400",
"_score": 13.513464,
"_source": {
"id": 63400,
"name": "asus x409",
"brand_name": "asus",
"brand_id": 12,
"category_name": "x400 series"
}
},{
"_index": "crucial",
"_id": "63412",
"_score": 13.279591,
"_source": {
"id": 63412,
"name": "asus x452e",
"brand_name": "asus",
"brand_id": 12,
"category_name": "x400 series"
}
}
]
},
"aggregations": {
"auto_complete": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 12,
"doc_count": 297
},
{
"key": 40,
"doc_count": 22
}
]
}
}
}
As you can see the search query is "asus x452l" and I expected "asus x452e" to have maximum score but it's not! what is the problem with scoring and how can I fix this?
CodePudding user response:
The problem was in difference of autocomplete_search
tokenizer and autocomplete
tokenizer.
"analyzer": {
"autocomplete": {
"filter": [
"lowercase"
],
"tokenizer": "autocomplete"
},
"autocomplete_search": {
"filter": [
"lowercase"
],
"tokenizer": "standard"
}
}
Changing "search_analyzer": "autocomplete_search"
to "search_analyzer": "autocomplete"
fixed my problem.
"brand_name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},"name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}