Home > Mobile >  How to get only 1 of each item in elastic search?
How to get only 1 of each item in elastic search?

Time:07-01

I do a search in elastic search and get all the items, sorted by the "prGreater" field, for example. But I can have it multiple times in the query return.

Is it possible to search for all items but in this return get only 1 of each item?

This is my mapping:

{
    "mappings": {
        "properties": {
            "name": {
                "type": "text"
            },
            "nmId": {
                "type": "text"
            },
            "pcDay": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "vlPaperNeg": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "dhDay": {
                "type": "date",
                "format": "strict_date_time"
            },
            "prBegin": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "prGreater": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "prLowest": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "prUNegoci": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "prAvg": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "prClose": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "qtdCamp": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "qtdSell": {
                "type": "scaled_float",
                "scaling_factor": 1000000
            },
            "estdPaper": {
                "type": "text"
            }
        }
    }
}

Query:

{
    "size": 100,
    "query": {
        "bool": {
            "filter": [
                {
                    "range": {
                        "dhDay": {
                            "from": "2022-06-23T00:00:00.000 0000",
                            "to": "2022-06-24T13:33:04.432 0000",
                            "include_lower": true,
                            "include_upper": false,
                            "boost": 1.0
                        }
                    }
                }
            ],
            "adjust_pure_negative": true,
            "boost": 1.0
        }
    },
    "sort": [
        {
            "prGreater": {
                "order": "desc"
            }
        }
    ]
}

CodePudding user response:

You basically want to group the items, say by their "name" and then for each group, get the latest item, sorted in descending order by dhDay.

You need a nested top hits aggregation:

{
  "size": 0,
  "aggs": {
    "name": {
      "terms": {
        "field": "name",
        "size": 100
      },
      "aggs": {
        "dhday_max": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                  "dhday": {
                      "order": "desc"
                  }
              }
            ]
          }
        }
      }
    }
  }
}

THe subaggregation will contain the latest item of a given name, sorted on the dhDay. You can add more fields to the sort or use 'prgreater" or anything else that you may need.

  • Related