I'd like to 'join' two MongoDB collections on a key, without creating a nested structure.
Code:
# Init client
client_mongo = pymongo.MongoClient(
host="mongo", port=27017, username="user", password="pass"
)
# Use db gen
db = client_mongo.gen
# Create collections
db.create_collection("wikipedia")
db.create_collection("twitter")
# Add sample record to collection twitter
db.twitter.insert_one({'_id': "1527275059001274368",
'name': 'Nike',
'username_twitter': 'nikestore',
'user_id_twitter': 17351972,
'text_twitter': '@sillspb Hey, Paul. What error message are you seeing on your end?'})
# Add sample record to collection wikipedia
db.wikipedia.insert_one({'_id': '6286417074ef92666893148a',
'name': 'Nike',
'page_wikipedia': 'Nike,_Inc.',
'text_wikipedia': 'Nike, Inc. ( or ) is an American multinational corporation that is engaged in the design, development, manufacturing, and worldwide marketing and sales of footwear, apparel, equipment, accessories, and services.'})
I'd like to (do the equivalent of) join on key "name" and create a new collection named "twitter_wikipedia", without a nested structure.
Current solution with nested structure:
db.twitter.aggregate(
[
{
"$lookup": {
"from": "wikipedia", # other table name
"localField": "name", # key field in collection 2
"foreignField": "name", # key field in collection 1
"as": "linked_collections", # alias for resulting table
}
},
{
'$out': 'twitter_wikipedia'
}
])
Current output
db.twitter_wikipedia.find_one({})
{'_id': '1527275059001274368',
'name': 'Nike',
'username_twitter': 'nikestore',
'user_id_twitter': 17351972,
'text_twitter': '@sillspb Hey, Paul. What error message are you seeing on your end?',
'linked_collections': [{'_id': '6286417074ef92666893148a',
'name': 'Nike',
'page_wikipedia': 'Nike,_Inc.',
'text_wikipedia': 'Nike, Inc. ( or ) is an American multinational corporation that is engaged in the design, development, manufacturing, and worldwide marketing and sales of footwear, apparel, equipment, accessories, and services.'}]}
# Desired output
{'name': 'Nike',
'username_twitter': 'nikestore',
'user_id_twitter': 17351972,
'text_twitter': '@sillspb Hey, Paul. What error message are you seeing on your end?',
'page_wikipedia': 'Nike,_Inc.',
'text_wikipedia': 'Nike, Inc. ( or ) is an American multinational corporation that is engaged in the design, development, manufacturing, and worldwide marketing and sales of footwear, apparel, equipment, accessories, and services.'}
CodePudding user response:
Just perform simple $unwind
and $project
after you do the $lookup
db.twitter.aggregate([
{
"$lookup": {
"from": "wikipedia",
"localField": "name",
"foreignField": "name",
"as": "linked_collections"
}
},
{
"$unwind": "$linked_collections"
},
{
"$project": {
"name": 1,
"username_twitter": 1,
"user_id_twitter": 1,
"text_twitter": 1,
"page_wikipedia": "$linked_collections.page_wikipedia",
"text_wikipedia": "$linked_collections.text_wikipedia"
}
},
{
$out: "twitter_wikipedia"
}
])
Here is the Mongo Playground for your reference.