i have a json like this
{"index":{"_index":"companydatabase"}}
{"FirstName":"ELVA","LastName":"RECHKEMMER","Designation":"CEO","Salary":"154000","DateOfJoining":"1993-01-11","Address":"8417 Blue Spring St. Port Orange, FL 32127","Gender":"Female","Age":62,"MaritalStatus":"Unmarried","Interests":["Body Building","Illusion","Protesting","Taxidermy","TV watching","Cartooning","Skateboarding"]}
{"index":{"_index":"companydatabase"}}
{"FirstName":"JENNEFER","LastName":"WENIG","Designation":"President","Salary":"110000","DateOfJoining":"2013-02-07","Address":"16 Manor Station Court Huntsville, AL 35803","Gender":"Female","Age":45,"MaritalStatus":"Unmarried","Interests":["String Figures","Working on cars","Button Collecting","Surf Fishing"]}
{"index":{"_index":"companydatabase"}}
and I want to count the most common interests between this persons
I try something like:
request_body = {
"size": 0,
"aggs": {
"interests": {
"terms": {
"field": "Interests.keyword",
"size": 10,
"order": {
"count": "desc"
}
}
}
}
}
JSON(es.search(index="companydatabase", body=request_body))
but it does not work
thanks for helping me
CodePudding user response:
I tried it on your data-set and you don't have any common interest in both the example hence everything is 1, Use below JSON where some are common interests
{
"FirstName": "JENNEFER",
"LastName": "WENIG",
"Designation": "President",
"Salary": "110000",
"DateOfJoining": "2013-02-07",
"Address": "16 Manor Station Court Huntsville, AL 35803",
"Gender": "Female",
"Age": 45,
"MaritalStatus": "Unmarried",
"Interests": [
"String Figures",
"Working on cars",
"Button Collecting",
"Surf Fishing",
"Body Building",
"Button Collecting",
"Cartooning"
]
}
And
{
"FirstName": "ELVA",
"LastName": "RECHKEMMER",
"Designation": "CEO",
"Salary": "154000",
"DateOfJoining": "1993-01-11",
"Address": "8417 Blue Spring St. Port Orange, FL 32127",
"Gender": "Female",
"Age": 62,
"MaritalStatus": "Unmarried",
"Interests": [
"Body Building",
"Illusion",
"Protesting",
"Taxidermy",
"TV watching",
"Cartooning",
"Skateboarding"
]
}
Now terms aggs by default sort the buckets based on doc_count so you don't need another order in your query
{
"size": 0,
"aggs": {
"interests": {
"terms": {
"field": "Interests.keyword",
"size": 10
}
}
}
}
Gives you below
"buckets": [
{
"key": "Body Building",
"doc_count": 2 --> note
},
{
"key": "Cartooning",
"doc_count": 2 -- note
},
{
"key": "Button Collecting",
"doc_count": 1
},
{
"key": "Illusion",
"doc_count": 1
},
{
"key": "Protesting",
"doc_count": 1
},
{
"key": "Skateboarding",
"doc_count": 1
},
{
"key": "String Figures",
"doc_count": 1
},
{
"key": "Surf Fishing",
"doc_count": 1
},
{
"key": "TV watching",
"doc_count": 1
},
{
"key": "Taxidermy",
"doc_count": 1
}
]