Home > OS >  Elasticsearch boosting results based on property
Elasticsearch boosting results based on property

Time:09-28

Have a question about ElasticSearch searching. F.e. I performed a search with query "EPS 100" and ES returned 3 documents with sorting by 1) _score 2) discount:

[
  {
    "_index": "index",
    "_type": "default",
    "_id": "23611",
    "_score": 4027.2395,
    "_source": {
      "name": "ETNA EPS 100 - 20 x 600 x 1200"
    },
    "sort": [
      4027.2395, <--- _score
      0 <---- Discount
    ]
  },
  {
    "_index": "index",
    "_type": "default",
    "_id": "23610",
    "_score": 3950.8713,
    "_source": {
      "name": "ETNA EPS 80 - 100 x 600 x 1200 mm"
    },
    "sort": [
      3950.8713, <--- _score
      0 <--- Discount 
    ]
  },
  {
    "_index": "index",
    "_type": "default",
    "_id": "23602",
    "_score": 3872.0818,
    "_source": {
      "name": "ETNA EPS 50 - 100 x 600 x 1200 mm"
    },
    "sort": [
      3872.0818, <--- _score
      3.72 <--- Discount 
    ]
  }
]

The question is how should I perform my search to:

  • Firstly, find exact match by my search query
  • Secondly, if exact match is not found, the nearest result with discount should appear next

In given JSON I should see the following results:

  • ETNA EPS 100 - 20 x 600 x 1200 (Exact match by "EPS 100")
  • ETNA EPS 50 - 100 x 600 x 1200 mm (because discount present)
  • ETNA EPS 80 - 100 x 600 x 1200 mm

EDIT 1: My query:

{
  "query": {
    "function_score": {
      "query": {
        "boosting": {
          "positive": {
            "bool": {
              "minimum_should_match": 1,
              "should": [
                {
                  "term": {
                    "code": {
                      "value": "eps 100",
                      "boost": 200
                    }
                  }
                },
                {
                  "multi_match": {
                    "fields": [
                      "name",
                      "name.folded"
                    ],
                    "query": "eps 100",
                    "type": "cross_fields",
                    "operator": "and",
                    "boost": 2
                  }
                },
                {
                  "multi_match": {
                    "fields": [
                      "name.search_folded"
                    ],
                    "query": "eps 100",
                    "type": "cross_fields",
                    "operator": "and",
                    "boost": 500
                  }
                },
                {
                  "match_phrase_prefix": {
                    "name.search_folded": {
                      "query": "eps 100"
                    }
                  }
                },
                {
                  "match_phrase_prefix": {
                    "name.folded": {
                      "query": "eps 100"
                    }
                  }
                }
              ],
              "filter": [
                {
                  "term": {
                    "enabled": {
                      "value": true,
                      "boost": 1
                    }
                  }
                }
              ]
            }
          }
        }
      }
    }
  },
  "sort": {
    "_score": {
      "order": "desc"
    },
    "discount": {
      "order": "desc",
      "missing": "_last",
      "unmapped_type": "double"
    }
  }
}

Mapping:

{
  "state": "open",
  "settings": {
    "index": {
      "mapping": {
        "total_fields": {
          "limit": "2000"
        }
      },
      "number_of_shards": "5",
      "provided_name": "name",
      "creation_date": "1664261140456",
      "analysis": {
        "filter": {
          "lithuanian_stop": {
            "type": "stop",
            "stopwords": "_lithuanian_"
          },
          "lithuanian_stemmer": {
            "type": "stemmer",
            "language": "lithuanian"
          }
        },
        "char_filter": {
          "lithuanian_char_filter": {
            "type": "mapping",
            "mappings": [
              "a => 10",
              "A => 10",
              "ą => 11",
              "Ą => 11",
              "b => 12",
              "B => 12",
              "c => 13",
              "C => 13",
              "č => 14",
              "Č => 14",
              "d => 15",
              "D => 15",
              "e => 16",
              "E => 16",
              "ę => 17",
              "Ę => 17",
              "ė => 18",
              "Ė => 18",
              "f => 18",
              "F => 18",
              "g => 19",
              "G => 19",
              "h => 20",
              "H => 20",
              "i => 21",
              "I => 21",
              "į => 22",
              "Į => 22",
              "y => 23",
              "Y => 23",
              "j => 24",
              "J => 24",
              "k => 25",
              "K => 25",
              "l => 26",
              "L => 26",
              "m => 27",
              "M => 27",
              "n => 28",
              "N => 28",
              "o => 29",
              "O => 29",
              "p => 30",
              "P => 30",
              "r => 31",
              "R => 31",
              "s => 32",
              "S => 32",
              "š => 33",
              "Š => 33",
              "t => 34",
              "T => 34",
              "u => 35",
              "U => 35",
              "ų => 36",
              "Ų => 36",
              "ū => 37",
              "Ū => 37",
              "v => 38",
              "V => 38",
              "z => 39",
              "Z => 39",
              "ž => 40",
              "Ž => 40"
            ]
          }
        },
        "normalizer": {
          "lithuanian_sort_normalizer": {
            "char_filter": [
              "lithuanian_char_filter"
            ]
          }
        },
        "analyzer": {
          "search_asciifolding": {
            "filter": [
              "lowercase",
              "asciifolding"
            ],
            "tokenizer": "standard"
          },
          "lithianian_asciifolding": {
            "filter": [
              "lowercase",
              "asciifolding",
              "lithuanian_stop",
              "lithuanian_stemmer"
            ],
            "tokenizer": "standard"
          }
        }
      },
      "number_of_replicas": "1",
      "uuid": "uuid",
      "version": {
        "created": "6040399"
      }
    }
  },
  "mappings": {
    "default": {
      "dynamic_templates": [
        {
          "name": {
            "mapping": {
              "analyzer": "lithuanian",
              "fields": {
                "search_folded": {
                  "analyzer": "search_asciifolding",
                  "type": "text"
                },
                "keyword": {
                  "type": "keyword"
                },
                "folded": {
                  "analyzer": "lithianian_asciifolding",
                  "type": "text"
                }
              },
              "type": "text"
            },
            "match": "name"
          }
        },
        {
          "discount_attribute": {
            "mapping": {
              "type": "double"
            },
            "match": "discount"
          }
        }
      ]
    }
  }
}

CodePudding user response:

Maybe the query I propose is not the silver bullet but it can help you get the result you want. I used the field value factor to boost the docs with the "discount" field. Note that there are two match clauses, the first one is to ensure exact match and I added a high boost to ensure exact docs at the top even if the discount field is zero. The second is just to recover the remaining documents.

{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "should": [
            {
              "match": {
                "name": {
                  "query": "EPS 100", >> boost for exactly match
                  "boost": 100,
                  "operator": "and"
                }
              }
            },
            {
              "match": {
                "name": {
                  "query": "EPS 100" >> return all doc with term eps
                }
              }
            }
          ]
        }
      },
      "functions": [
        {
          "field_value_factor": {
            "field": "discount", >> boost for doc with discount
            "factor": 1.2,
            "modifier": "none"
          }
        }
      ],
      "score_mode": "multiply",
      "boost_mode": "sum"
    }
  }
}
  • Related