Have a question about ElasticSearch searching. F.e. I performed a search with query "EPS 100" and ES returned 3 documents with sorting by 1) _score 2) discount:
[
{
"_index": "index",
"_type": "default",
"_id": "23611",
"_score": 4027.2395,
"_source": {
"name": "ETNA EPS 100 - 20 x 600 x 1200"
},
"sort": [
4027.2395, <--- _score
0 <---- Discount
]
},
{
"_index": "index",
"_type": "default",
"_id": "23610",
"_score": 3950.8713,
"_source": {
"name": "ETNA EPS 80 - 100 x 600 x 1200 mm"
},
"sort": [
3950.8713, <--- _score
0 <--- Discount
]
},
{
"_index": "index",
"_type": "default",
"_id": "23602",
"_score": 3872.0818,
"_source": {
"name": "ETNA EPS 50 - 100 x 600 x 1200 mm"
},
"sort": [
3872.0818, <--- _score
3.72 <--- Discount
]
}
]
The question is how should I perform my search to:
- Firstly, find exact match by my search query
- Secondly, if exact match is not found, the nearest result with discount should appear next
In given JSON I should see the following results:
- ETNA EPS 100 - 20 x 600 x 1200 (Exact match by "EPS 100")
- ETNA EPS 50 - 100 x 600 x 1200 mm (because discount present)
- ETNA EPS 80 - 100 x 600 x 1200 mm
EDIT 1: My query:
{
"query": {
"function_score": {
"query": {
"boosting": {
"positive": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"term": {
"code": {
"value": "eps 100",
"boost": 200
}
}
},
{
"multi_match": {
"fields": [
"name",
"name.folded"
],
"query": "eps 100",
"type": "cross_fields",
"operator": "and",
"boost": 2
}
},
{
"multi_match": {
"fields": [
"name.search_folded"
],
"query": "eps 100",
"type": "cross_fields",
"operator": "and",
"boost": 500
}
},
{
"match_phrase_prefix": {
"name.search_folded": {
"query": "eps 100"
}
}
},
{
"match_phrase_prefix": {
"name.folded": {
"query": "eps 100"
}
}
}
],
"filter": [
{
"term": {
"enabled": {
"value": true,
"boost": 1
}
}
}
]
}
}
}
}
}
},
"sort": {
"_score": {
"order": "desc"
},
"discount": {
"order": "desc",
"missing": "_last",
"unmapped_type": "double"
}
}
}
Mapping:
{
"state": "open",
"settings": {
"index": {
"mapping": {
"total_fields": {
"limit": "2000"
}
},
"number_of_shards": "5",
"provided_name": "name",
"creation_date": "1664261140456",
"analysis": {
"filter": {
"lithuanian_stop": {
"type": "stop",
"stopwords": "_lithuanian_"
},
"lithuanian_stemmer": {
"type": "stemmer",
"language": "lithuanian"
}
},
"char_filter": {
"lithuanian_char_filter": {
"type": "mapping",
"mappings": [
"a => 10",
"A => 10",
"ą => 11",
"Ą => 11",
"b => 12",
"B => 12",
"c => 13",
"C => 13",
"č => 14",
"Č => 14",
"d => 15",
"D => 15",
"e => 16",
"E => 16",
"ę => 17",
"Ę => 17",
"ė => 18",
"Ė => 18",
"f => 18",
"F => 18",
"g => 19",
"G => 19",
"h => 20",
"H => 20",
"i => 21",
"I => 21",
"į => 22",
"Į => 22",
"y => 23",
"Y => 23",
"j => 24",
"J => 24",
"k => 25",
"K => 25",
"l => 26",
"L => 26",
"m => 27",
"M => 27",
"n => 28",
"N => 28",
"o => 29",
"O => 29",
"p => 30",
"P => 30",
"r => 31",
"R => 31",
"s => 32",
"S => 32",
"š => 33",
"Š => 33",
"t => 34",
"T => 34",
"u => 35",
"U => 35",
"ų => 36",
"Ų => 36",
"ū => 37",
"Ū => 37",
"v => 38",
"V => 38",
"z => 39",
"Z => 39",
"ž => 40",
"Ž => 40"
]
}
},
"normalizer": {
"lithuanian_sort_normalizer": {
"char_filter": [
"lithuanian_char_filter"
]
}
},
"analyzer": {
"search_asciifolding": {
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "standard"
},
"lithianian_asciifolding": {
"filter": [
"lowercase",
"asciifolding",
"lithuanian_stop",
"lithuanian_stemmer"
],
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "uuid",
"version": {
"created": "6040399"
}
}
},
"mappings": {
"default": {
"dynamic_templates": [
{
"name": {
"mapping": {
"analyzer": "lithuanian",
"fields": {
"search_folded": {
"analyzer": "search_asciifolding",
"type": "text"
},
"keyword": {
"type": "keyword"
},
"folded": {
"analyzer": "lithianian_asciifolding",
"type": "text"
}
},
"type": "text"
},
"match": "name"
}
},
{
"discount_attribute": {
"mapping": {
"type": "double"
},
"match": "discount"
}
}
]
}
}
}
CodePudding user response:
Maybe the query I propose is not the silver bullet but it can help you get the result you want. I used the field value factor to boost the docs with the "discount" field. Note that there are two match clauses, the first one is to ensure exact match and I added a high boost to ensure exact docs at the top even if the discount field is zero. The second is just to recover the remaining documents.
{
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"name": {
"query": "EPS 100", >> boost for exactly match
"boost": 100,
"operator": "and"
}
}
},
{
"match": {
"name": {
"query": "EPS 100" >> return all doc with term eps
}
}
}
]
}
},
"functions": [
{
"field_value_factor": {
"field": "discount", >> boost for doc with discount
"factor": 1.2,
"modifier": "none"
}
}
],
"score_mode": "multiply",
"boost_mode": "sum"
}
}
}