Context
I have been teaching myself how to build a search feature on top of Azure Cognitive Search. The idea is basically a search engine that allows for searching questions against data in an existing document store. One enhancement that I have been looking at is to have a suggestion dialog pop up as the user types.
The Solution
First, I thought it was easy, just point the search_client.sugest()
handler at the orignial searchable document store. However, that contains the objects that can be matched to a question, rather then the actual questions themselves.
What I actually wanted was a suggestion that returns the questions users have been searching. I want to return the suggestion of 'What is a chicken' as the user types 'What is a ...'. I therefore introduced a second index and document store called searches-idx
this intercepts what users are searching and writes it to a standalone store, I then pointed the search_client.sugest()
handler at that.
The problem
When the user types a question, the client returns suggestions as they type (woo!). If the user happens to see the question they want to ask appear in the dialog, they would naturally click the question and expect the search to take place.
This happens as expected, however; the actual search query param would be q=What is a chicken
and results will be returned for every document that happens to have what
, a
, chicken
etc.
The broadness of my searching is too wide and I need to narrow it, but I am unsure how best to approach this.
- I have looked at attributes like
minimum_coverage
, for matching based on a float. - I have dabbled in swapping out the analyser, but I couldn't make sense of the best one to use.
- I have looked at reducing the
SearchableField
on the index
The Code
Index Builder
from typing import List
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (CorsOptions, SearchField,
SearchIndex,
SearchSuggester)
from settings import SEARCH_SERVICE_KEY, SEARCH_SERVICE_ENDPOINT, MAX_CORS_AGE
def build_cors(allowed_origins: List[str], max_age: int = 60) -> CorsOptions:
return CorsOptions(allowed_origins=allowed_origins, max_age_in_seconds=max_age)
def build_scoring_profile() -> List:
"""
You should create one or more scoring profiles when the default ranking behavior
does not go far enough in meeting your business objectives.
INFO: https://learn.microsoft.com/en-us/azure/search/index-add-scoring-profiles
"""
# TODO:
return list()
def build_suggesters(suggestion_fields: List[str]) -> SearchSuggester:
"""
Used for autocompleted queries or suggested search results, one per index.
It is a data structure that stores prefixes for matching on partial queries like autocomplete and suggestions.
Consists of a name and suggester-aware fields that provide content for autocompleted queries and suggested results.
searchMode is required, and always set to analyzingInfixMatching.
It specifies that matching will occur on any term in the query string.
"""
return SearchSuggester(name="sg", search_mode="analyzingInfixMatching", source_fields=suggestion_fields)
def build_search_index(
index_name: str,
fields: List[SearchField],
suggesters: SearchSuggester,
scoring_profiles: List,
cors_options: CorsOptions
) -> SearchIndex:
return SearchIndex(
name=index_name,
fields=fields,
suggesters=[suggesters],
scoring_profiles=scoring_profiles,
cors_options=cors_options
)
def create(
allowed_origins: List[str],
suggestion_fields: List[str],
index_name: str,
fields: List[SearchField]
) -> None:
"""
Will build a new index from scratch.
:param allowed_origins: Cors Allowed Origins
:param suggestion_fields: Fields that will be queried as part of suggestions
:param index_name: The name of the index
:param fields: The actual fields of the index
"""
scoring_profiles: List = build_scoring_profile()
cors_options: CorsOptions = build_cors(allowed_origins=allowed_origins, max_age=MAX_CORS_AGE)
suggesters: SearchSuggester = build_suggesters(suggestion_fields=suggestion_fields)
index = build_search_index(
index_name=index_name,
fields=fields,
suggesters=suggesters,
scoring_profiles=scoring_profiles,
cors_options=cors_options
)
credential = AzureKeyCredential(key=SEARCH_SERVICE_KEY)
client = SearchIndexClient(endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential)
client.create_index(index)
Uploader
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from loader import Loader
from settings import (SEARCH_SERVICE_ENDPOINT, SEARCH_SERVICE_KEY,
SEARCH_SOCIAL_INDEX_NAME)
def upload(reshaped_data, index_name: str = SEARCH_SOCIAL_INDEX_NAME):
with SearchClient(
endpoint=SEARCH_SERVICE_ENDPOINT,
index_name=index_name,
credential=AzureKeyCredential(key=SEARCH_SERVICE_KEY)
) as client:
loader = Loader(client, reshaped_data)
loader.to_index()
Search Handlers
import uuid
from datetime import datetime, timezone
from typing import Dict, List
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from flask import Blueprint, Response, jsonify, request
from werkzeug.exceptions import NotFound
from search import SearchableItem
from http_raiser import raise_http
from settings import (SEARCH_SERVICE_ENDPOINT, SEARCH_SERVICE_KEY,
SEARCH_SOCIAL_INDEX_NAME)
from utils import upload
search_blueprint = Blueprint("search", __name__, url_prefix="/search")
@search_blueprint.route("", methods=("GET",))
def search() -> Response:
top = request.args.get('top', 10)
skip = request.args.get('skip', 0)
q = request.args.get('q')
if q is None:
raise_http(NotFound, "Please provide a query (?q=) parameter")
client = SearchClient(
endpoint=SEARCH_SERVICE_ENDPOINT,
index_name=SEARCH_SOCIAL_INDEX_NAME,
credential=AzureKeyCredential(key=SEARCH_SERVICE_KEY)
)
# Add the searched query to our Search Index
upload(
index_name="searches-idx",
reshaped_data={
"search_id": str(uuid.uuid4()),
"search_query": q,
"searched_on": datetime.now(tz=timezone.utc).isoformat()
}
)
search_results = client.search(
search_text=q,
top=top,
skip=skip,
)
results = SearchableItem.from_result_as_dict(search_results)
return jsonify(list(results))
@search_blueprint.route("/suggest", methods=("GET", ))
def suggest() -> Response:
"""
A Suggestions request is a search-as-you-type query that looks for matching values in suggester-aware
fields and returns documents that contain a match.
For example, if you enable suggestions on a city field, typing "sea" produces documents
containing "Seattle", "Sea Tac", and "Seaside" (all actual city names) for that field.
"""
q = request.args.get('q')
if q is None:
raise_http(NotFound, "Please provide a query (q=) parameter")
top = request.args.get('top', 10)
minimum_coverage = request.args.get('minimum_coverage', 80)
client = SearchClient(
endpoint=SEARCH_SERVICE_ENDPOINT,
index_name="searches-idx",
credential=AzureKeyCredential(key=SEARCH_SERVICE_KEY)
)
suggestions: List[Dict] = client.suggest(
top=top,
search_text=q,
minimum_coverage=float(minimum_coverage),
select=["*"],
suggester_name="sg",
use_fuzzy_matching=True,
)
return jsonify(list(suggestions))
CodePudding user response:
What you're looking for is the semantic search, which will try to proper match the provided query params with content stored inside your indexes.
https://learn.microsoft.com/en-us/azure/search/semantic-search-overview