Home > Software design >  Return a value if field is missing in Elastic search document
Return a value if field is missing in Elastic search document

Time:02-02

I have an index user. I'm filtering user on LOCATION_ID and _source consists of PHONE_NUMBER & USER_ID, some of the documents does not have PHONE_NUMBER data. So,it returns a reponse with just USER_ID. Is there any way i will get some default or predefined value(passing in query, like we do in count for missing field) for field PHONE_NUMBER in cases where its missing in document.

Mapping:

{
  "PHONE_NUMBER": {
    "type": "long",
    "store": true
  },
  "USER_ID": {
    "type": "long",
    "store": true
  },
  "LOCATION_ID": {
    "type": "long",
    "store": true
  }
}

Query:

{
  "_source":[
     "PHONE_NUMBER",
     "USER_ID"
  ],
  "query":{
     "bool":{
        "must":[
           {
              "terms":{
                 "LOCATION_ID":[
                    "5001"
                 ]
              }
           }
        ],
        "must_not":[
           
        ]
     }
  },
  "from":0,
  "size":2000
}

Response:

{
  "took":0,
  "timed_out":false,
  "_shards":{
     "total":1,
     "successful":1,
     "skipped":0,
     "failed":0
  },
  "hits":{
     "total":{
        "value":4,
        "relation":"eq"
     },
     "max_score":2.0,
     "hits":[
        {
           "_index":"user",
           "_id":"39788",
           "_score":2.0,
           "_source":{
              "USER_ID":39788
           }
        },
        {
           "_index":"user",
           "_id":"30784",
           "_score":2.0,
           "_source":{
              "USER_ID":30784,
              "PHONE_NUMBER":1234567890
           }
        },
        {
           "_index":"user",
           "_id":"36373",
           "_score":2.0,
           "_source":{
              "USER_ID":36373,
              "PHONE_NUMBER":1234567893
           }
        },
        {
           "_index":"user",
           "_id":"36327",
           "_score":2.0,
           "_source":{
              "USER_PROJECT_USER_ID":36327
           }
        }
     ]
  }
}

In above reponse PHONE_NUMBER is missing in first last document. I want some default or predifined value(set in query, like we do in count for missing field) to return if field is missing.

Expected Response:

{
  "took":0,
  "timed_out":false,
  "_shards":{
     "total":1,
     "successful":1,
     "skipped":0,
     "failed":0
  },
  "hits":{
     "total":{
        "value":4,
        "relation":"eq"
     },
     "max_score":2.0,
     "hits":[
        {
           "_index":"user",
           "_id":"39788",
           "_score":2.0,
           "_source":{
              "USER_ID":39788,
              "PHONE_NUMBER":9876543210.     <- Default or Predifined value (set in query, like we do in count for missing field)
           }
        },
        {
           "_index":"user",
           "_id":"30784",
           "_score":2.0,
           "_source":{
              "USER_ID":30784,
              "PHONE_NUMBER":1234567890
           }
        },
        {
           "_index":"user",
           "_id":"36373",
           "_score":2.0,
           "_source":{
              "USER_ID":36373,
              "PHONE_NUMBER":1234567893
           }
        },
        {
           "_index":"user",
           "_id":"36327",
           "_score":2.0,
           "_source":{
              "USER_PROJECT_USER_ID":36327,
              "PHONE_NUMBER":9876543210      <- Default or Predifined value (set in query, like we do in count for missing field)
           }
        }
     ]
  }
}

any help would be greatly appreciated.

CodePudding user response:

Tldr

This exact result can not be achieved a query time. You will have to do it during the ingestion.

But there is a solution that is close enough at query time, using the runtime fields.

Solutions

1. At ingest time

You can set your mapping to be:

{
  "PHONE_NUMBER": {
    "type": "long",
    "store": true,
    "null_value": "9876543210" <- the specific / default number
  },
  "USER_ID": {
    "type": "long",
    "store": true
  },
  "LOCATION_ID": {
    "type": "long",
    "store": true
  }
}

Your document with no phone number will now have a default value. The down side is, it is not dynamic. You can not update this value at query time.

2. At query time

set up:

POST /_bulk
{"index":{"_index":"75278567"}}
{"USER_ID":123456,"PHONE_NUMBER":12345}
{"index":{"_index":"75278567"}}
{"USER_ID":234567,"PHONE_NUMBER":234567}
{"index":{"_index":"75278567"}}
{"USER_ID":345678}
{"index":{"_index":"75278567"}}
{"USER_ID":456789}

Using the runtime fields you could create the following query:

GET /75278567/_search
{
  "runtime_mappings": {
    "NUMBER": {
      "type": "keyword",
      "script": {
        "source": """
        if (doc["PHONE_NUMBER"].size() == 0){
          emit("000000")
        } else
        {
          emit(doc["PHONE_NUMBER"].value.toString())
        }
        """
      }
    }
  },
  "fields": [
    "NUMBER"
  ]
}

This is going to give you the following results:

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 4,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "75278567",
        "_id": "3njWAYYBArbKoMpIcXFp",
        "_score": 1,
        "_source": {
          "USER_ID": 123456,
          "PHONE_NUMBER": 12345
        },
        "fields": {
          "NUMBER": [
            "12345"
          ]
        }
      },
      {
        "_index": "75278567",
        "_id": "33jWAYYBArbKoMpIcXFp",
        "_score": 1,
        "_source": {
          "USER_ID": 234567,
          "PHONE_NUMBER": 234567
        },
        "fields": {
          "NUMBER": [
            "234567"
          ]
        }
      },
      {
        "_index": "75278567",
        "_id": "4HjWAYYBArbKoMpIcXFp",
        "_score": 1,
        "_source": {
          "USER_ID": 345678
        },
        "fields": {
          "NUMBER": [
            "000000"
          ]
        }
      },
      {
        "_index": "75278567",
        "_id": "4XjWAYYBArbKoMpIcXFp",
        "_score": 1,
        "_source": {
          "USER_ID": 456789
        },
        "fields": {
          "NUMBER": [
            "000000"
          ]
        }
      }
    ]
  }
}

This not not in the _source but you can access the default values in fields.

  • Related