I'm trying to develop a scroll
routine in order to process some elements stored in Elasticsearch
using Node.js
— specifically @elastic/elasticsearch
version 8.2.1
.
This is what I have with so far:
async function* scrollSearch(query) {
let response = await client.search({
index: "my.elasticsearch.index",
query: JSON.parse(query),
scroll: "1m",
size: 100,
// sort: [{ modification_date: "desc", }],
// track_total_hits: true,
});
while (true) {
const sourceHits = response.hits.hits;
if (sourceHits.length === 0) {
break;
}
for (const hit of sourceHits) {
yield hit;
}
if (!response._scroll_id) {
break;
}
response = await client.scroll({
scroll_id: response._scroll_id,
scroll: "10s",
});
}
}
For the time being I'm just debugging this, so I'm invoking the previous function directly:
const query = `
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "THE_STATUS"
}
},
{
"exists": {
"field": "object1.field1"
}
},
{
"exists": {
"field": "object2.field1"
}
},
{
"exists": {
"field": "object2.field2"
}
},
{
"exists": {
"field": "object3.field1"
}
},
{
"exists": {
"field": "object3.field2"
}
},
{
"exists": {
"field": "object3.field3"
}
}
]
}
}
}
`;
for await (const hit of scrollSearch(query)) {
console.log(hit._source);
}
I'm always getting the following error with that query:
ResponseError: parsing_exception: [parsing_exception] Reason: unknown query [query]
at SniffingTransport.request (node_modules/@elastic/transport/lib/Transport.js:476:27)
at processTicksAndRejections (node:internal/process/task_queues:96:5)
at async Client.SearchApi [as search] (node_modules/@elastic/elasticsearch/lib/api/api/search.js:65:12)
at async scrollSearch (file:///Users/x80486/Workshop/debug-tool/src/elasticsearch.service.js:15:18)
at async Context.<anonymous> (file:///Users/x80486/Workshop/debug-tool/test/my.test.js:132:22)
On the other hand, if I use the exact same query in the Dev Tools
interface, I'm getting the expected results back:
POST my.elasticsearch.index/_search?scroll=1m
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "THE_STATUS"
}
},
{
"exists": {
"field": "object1.field1"
}
},
{
"exists": {
"field": "object2.field1"
}
},
{
"exists": {
"field": "object2.field2"
}
},
{
"exists": {
"field": "object3.field1"
}
},
{
"exists": {
"field": "object3.field2"
}
},
{
"exists": {
"field": "object3.field3"
}
}
]
}
}
}
{
"_scroll_id" : "the scroll id value",
"took" : 266,
"timed_out" : false,
"_shards" : {
"total" : 12,
"successful" : 12,
"skipped" : 0,
"failed" : 0
},
...
}
Is there a way to know which part of the query is not understood by the Node.js
client or in general to validate this?
CodePudding user response:
Well, well...I just needed to remove the query
object/segment. This is how it must be:
{
"bool": {
"must": [
{
"match": {
"status": "THE_STATUS"
}
},
{
"exists": {
"field": "object1.field1"
}
},
{
"exists": {
"field": "object2.field1"
}
},
{
"exists": {
"field": "object2.field2"
}
},
{
"exists": {
"field": "object3.field1"
}
},
{
"exists": {
"field": "object3.field2"
}
},
{
"exists": {
"field": "object3.field3"
}
}
]
}
}