Home > Mobile >  Concatenating fields in OpenSearch / ElasticSearch aggregate
Concatenating fields in OpenSearch / ElasticSearch aggregate

Time:06-29

I have an OpenSearch index with the following mapping (simplified):

PUT /house
{
  "mappings": {
    "properties": {
      "house": { "type": "keyword" },
      "people": {
        "type": "nested",
        "properties": {
          "forename": { "type": "keyword" },
          "surname": { "type": "keyword" }
        }
      }
    }
  }
}

I'd like to retrieve an aggregate where the bucket key is "[forename] [surname]".

Toy data:

PUT /house/_doc/1
{
  "house": "house1",
  "people": [
    { "forename": "Dave", "surname": "Daveson" },
    { "forename": "Jeff", "surname": "Jeffson" }
  ]
}

PUT /house/_doc/2
{
  "house": "house1",
  "people": [
    { "forename": "Dave", "surname": "Daveson" },
    { "forename": "Jeffs", "surname": "Jeffsons" }
  ]
}

The following doesn't return what I'd expect, and I can't figure out what object paths to put in the script to get it to work:

GET house/_search
{
  "aggs": {
    "people": {
      "nested": {
        "path": "people"
      },
      "aggs": {
        "people.name": {
          "terms": {
            "script": "[params._source['forename'], params._source['surname']].join(' ')"
          }
        }
      }
    }
  },
  "size": 0
}

Returns:

{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "people" : {
      "doc_count" : 4,
      "people.name" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "null null",
            "doc_count" : 4
          }
        ]
      }
    }
  }
}

Without script I can aggregate correctly on forename, surname or both, but using both I can't reliably "join" the results since they can be sorted only on the doc_count or key:

GET house/_search
{
  "aggs": {
    "people": {
      "nested": {
        "path": "people"
      },
      "aggs": {
        "people.forename": {
          "terms": { "field": "people.forename" }
        },
        "people.surname": {
          "terms": { "field": "people.surname" }
        }
      }
    }
  },
  "size": 0
}

Returns:

{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "people" : {
      "doc_count" : 4,
      "people.surname" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Daveson",
            "doc_count" : 2
          },
          {
            "key" : "Jeffson",
            "doc_count" : 1
          },
          {
            "key" : "Jeffsons",
            "doc_count" : 1
          }
        ]
      },
      "people.forename" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Dave",
            "doc_count" : 2
          },
          {
            "key" : "Jeff",
            "doc_count" : 1
          },
          {
            "key" : "Jeffs",
            "doc_count" : 1
          }
        ]
      }
    }
  }
}

CodePudding user response:

You want this results:

GET house/_search
{
  "aggs": {
    "people": {
      "nested": {
        "path": "people"
      },
      "aggs": {
        "people.name": {
          "terms": {
            "script": "doc['people.forename'].value   ' '    doc['people.surname'].value"
          }
        }
      }
    }
  },
  "size": 0
}

Results:

"aggregations" : {
    "people" : {
      "doc_count" : 4,
      "people.name" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Dave Daveson",
            "doc_count" : 2
          },
          {
            "key" : "Jeff Jeffson",
            "doc_count" : 1
          },
          {
            "key" : "Jeffs Jeffsons",
            "doc_count" : 1
          }
        ]
      }
    }
  }
  • Related