please, help I need to do $lookup/join for field pid in 2 level nested array where coll2.string(pid)==coll1.a.p.pid , collection coll1 is pretty big and there is index on "a.p.pid" but not sure how to do lookup on nested array elements and by chance to avoid $unwind and use the index "a.p.pid" ...
coll1:
[
{
a:[
{
p:[
{pid:"1" ,date:"2022-01-22"},
{pid:"4", date:"2022-01-25"}
]
}
,{
p:[
{pid:"3",date:"2022-01-27"}
]
}
]
}
]
coll2:
[
{
pid:1
}
,
{
pid:3
}
]
Expected result:
[
{pid:1, date:"2022-01-22"}
{pid:3, date:"2022-01-27"}
]
collections sizes:
db.col2.count()
51171548
db.col1.count()
81039496
mongodb version 4.0
CodePudding user response:
Not sure why do you have to avoid using $unwind
db.col2.aggregate([
{
$set: { pid: { $toString: "$pid" } }
},
{
$lookup: {
from: "col1",
localField: "pid",
foreignField: "a.p.pid",
as: "date",
let: { pid: "$pid" },
pipeline: [
{
$unwind: "$a"
},
{
$unwind: "$a.p"
},
{
$match: { $expr: { $eq: [ "$a.p.pid", "$$pid" ] } }
}
]
}
},
{
$set: { date: { $first: "$date.a.p.date" }, pid: { $toInt: "$pid" } }
}
])
db.col2.aggregate([
{
$set: { pid: { $toString: "$pid" } }
},
{
$lookup: {
from: "col1",
localField: "pid",
foreignField: "a.p.pid",
as: "date"
}
},
{
$set: {
date: {
$filter: {
input: "$date",
as: "d1",
cond: {
$gt: [
{
$size: {
$filter: {
input: "$$d1.a",
as: "d2",
cond: {
$gt: [
{
$size: {
$filter: {
input: "$$d2.p",
as: "d3",
cond: { $eq: [ "$$d3.pid", "$pid" ] }
}
}
},
0
]
}
}
}
},
0
]
}
}
}
}
},
{
$set: { date: { $first: "$date.a" } }
},
{
$set: {
"date": {
$filter: {
input: "$date",
as: "d2",
cond: {
$gt: [
{
$size: {
$filter: {
input: "$$d2.p",
as: "d3",
cond: { $eq: [ "$$d3.pid", "$pid" ] }
}
}
},
0
]
}
}
}
}
},
{
$set: { date: { $first: "$date.p" } }
},
{
$set: {
date: {
$filter: {
input: "$date",
as: "d3",
cond: { $eq: [ "$$d3.pid", "$pid" ] }
}
}
}
},
{
$set: {
date: { $first: "$date.date" },
pid: { $toInt: "$pid" }
}
}
])