Home > Enterprise >  How to save deletion in a deeply nested MongoDB document
How to save deletion in a deeply nested MongoDB document

Time:03-11

I am new to MongoDB and I am using MongoDB shell to perform the operations. I am working to remove the array named Process from all the Items, but it seems that I do not grasp the remove concept correctly. The documents we use are deeply nested - we do not know how many items there are, or how deep the level of nesting.
What I tried so far is to use recursion to iterate through the items:



    function removeAllProcessFields(docItems)
    {
        if(Array.isArray(docItems))
        {
            docItems.forEach(function(item)
                {
                   print("idItem: " item._id);
                   if(item.Process == null)
                   {
                      print("Process null");
                   }
                   else
                   {
                      $unset: { Process: ""}
                   }
                   removeAllProcessFields(item.Items);
            })
        }
    }
    
    var docs = db.getCollection('MyCollection').find({})
    docs.forEach(function(doc)
    {
        print("idDoc: " doc._id);
        removeAllProcessFields(doc.Items);
    })

But I have difficulties on using unset properly to save the operation.
An example document would be:



    {
        "_id": "622226d319517e83e8ed6151",
        "Name": "test1",
        "Description": "",
        "Items": [{
            "_id": "622226d319517e83e8ed614e",
            "Name": "test-item",
            "Description": "",
            "Process": [{
                "Name": "Step1"
            }, {
                "Name": "Step2"
            }],
            "Items": [{
                    "_id": "622226d319517e83e8ed614f",
                    "Name": "test-subItem1",
                    "Description": "",
                    "Process": [{
                        "Name": "StepSub1"
                    }, {
                        "Name": "StepSub2"
                    }, {
                        "Name": "StepSub3"
                    }],
                    "Items": []
                },
                {
                    "_id": "622226d319517e83e8ed6150",
                    "Name": "test-subItem2",
                    "Description": "",
                    "Process": [{
                        "Name": "StepSub4"
                    }, {
                        "Name": "StepSub5"
                    }, {
                        "Name": "StepSub6"
                    }],
                    "Items": []
                }
    
            ]
        }]
    }

What I hope to achieve would be:



    {
        "_id": "622226d319517e83e8ed6151",
        "Name": "test1",
        "Description": "",
        "Items": [{
            "_id": "622226d319517e83e8ed614e",
            "Name": "test-item",
            "Description": "",
            "Items": [{
                    "_id": "622226d319517e83e8ed614f",
                    "Name": "test-subItem1",
                    "Description": "",
                    "Items": []
                },
                {
                    "_id": "622226d319517e83e8ed6150",
                    "Name": "test-subItem2",
                    "Description": "",
                    "Items": []
                }
    
            ]
        }]
    }

CodePudding user response:

Assuming you are on v>=4.4 you can use the "merge onto self" feature of $merge plus defining a recursive function to sweep through the collection and surgically remove one or a list of fields at any level of the hierarchy. The same sort of needs arise when processing json-schema data which is also arbitrarily hierarchical.

The solution below has extra logic to "mark" documents that had any modifications so the others can be removed from the update set passed to $merge. It also can be further refined to reduce some variables; it was edited down from a more general solution that had to examine keys and values.

db.foo.aggregate([
    {$replaceRoot: {newRoot: {$function: {
        body: function(obj, target) {
            var didSomething = false;

            var process = function(holder, spot, value) {
                // test FIRST since [] instanceof Object is true!                           
                if(Array.isArray(value)) {
                    for(var jj = value.length - 1; jj >= 0; jj--) {
                        process(value, jj, value[jj]);
                    }
                } else if(value instanceof Object) {
                    walkObj(value);
                }
            };

            var walkObj = function(obj) {
                Object.keys(obj).forEach(function(k) {
                    if(target.indexOf(k) > -1) {
                        delete obj[k];
                        didSomething = true;
                    } else {
                        process(obj, k, obj[k]);
                    }
                });
            }

            // ENTRY POINT:      
            if(!Array.isArray(target)) {
                target = [ target ]; // if not array, make it an array
            }
            walkObj(obj);
            obj['__didSomething'] = didSomething;

            return obj;
        },

        // Invoke!
        // You can delete multiple fields with an array, e.g.:
        //   ..., ['Process','Description']
        args: [ "$$ROOT", 'Process' ],

        lang: "js"
        }}
    }}

    ,{$match: {'__didSomething':true}}
    ,{$project: {'__didSomething':false}} // take the marker out

    ,{$merge: {
        into: "foo",
        on: [ "_id" ],
        whenMatched: "merge",
        whenNotMatched: "fail"
    }}

]);

CodePudding user response:

Something like this maybe using the $[] positional operator:

db.collection.update({},
{
 $unset: {
  "Items.$[].Items.$[].Process": 1,
  "Items.$[].Process": 1
}
})

You just need to construct it in the recursion ...

playground

JavaScript recursive function example:

 mongos> db.rec.find()
{ "_id" : ObjectId("622a6c46ae295edb276df8e2"), "Items" : [ { "a" : 1 }, { "Items" : [ { "Items" : [ { "Items" : [ ], "Process" : [ 1, 2, 3 ] } ], "Process" : [ 4, 5, 6 ] } ], "Process" : [ ] } ] }

 mongos> db.rec.find().forEach(function(obj){ var id=obj._id,ar=[],z=""; function x(obj){ if(typeof obj.Items != "undefined" ){ obj.Items.forEach(function(k){ if( typeof k.Process !="undefined" ){ z=z ".Items.$[]";ar.push(z.substring(1) ".Process") }; if(typeof k.Items != "undefined"){x(k)}else{}  }) }else{}  };x(obj);ar.forEach(function(del){print( "db.collection.update({_id:ObjectId('" id "')},{$unset:{'" del "':1}})" );}) })

 db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Process':1}})
 db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Process':1}})
 db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Items.$[].Process':1}})
 mongos> 

Explained:

  1. Loop over all documents in collection with forEach
  2. Define recursive function x that will loop over any number of nested Items and identify if there is Process field and push to array ar
  3. Finally loop over array ar and construct the update $unset query , in the example only printed for safety , but you can improve generating single query per document and executing unset query ...
  • Related