Home > database >  Merge subdocument on key value in MongoDb
Merge subdocument on key value in MongoDb

Time:01-21

I have this model

[
  {
    "companyId": 1,
    "dataFields": {
            "ghg_emissions_scope1": {
                "dataField": "ghg_emissions_scope1",
                "iterations": [
                    {"year": "2019", "scope1": "12459"},
                    {"year": "2020", "scope1": "15483"},
                    {"year": "2021", "scope1": "90051"}
                ]
            },
            "ghg_emissions_scope2": {
                "dataField": "ghg_emissions_scope2",
                "iterations": [
                    {"year": "2017", "scope2_location_based": "1234", "scope2_market_based": "1234"},
                    {"year": "2018", "scope2_location_based": "13423", "scope2_market_based": "543654"},
                    {"year": "2019", "scope2_location_based": "235434", "scope2_market_based": "42343"},
                    {"year": "2020", "scope2_location_based": "12432334", "scope2_market_based": "234423"}
                ]
            },
            "ghg_emissions_scope3": {
                "dataField": "ghg_emissions_scope3",
                "iterations": [
                    {"year": "2018", "ghg_emission_associated": "354", "ghg_emission_other": "423423", "ghg_emissions_products": "967"},
                    {"year": "2019", "ghg_emission_associated": "12459", "ghg_emission_other": "6545476", "ghg_emissions_products": "445663"},
                    {"year": "2020", "ghg_emission_associated": "6454", "ghg_emission_other": "423432", "ghg_emissions_products": "756657"},
                    {"year": "2021", "ghg_emission_associated": "234", "ghg_emission_other": "1235", "ghg_emissions_products": "564"},
                    {"year": "2022", "ghg_emission_associated": "4523", "ghg_emission_other": "435", "ghg_emissions_products": "5345"}
                ]
            }
        }
}
]

And I want to extract my data by year. This is the model that I'd like to obtain:

{
    "companyId": 1
    "year": {
        "2017": {
            "year": "2017",
            "scope1": null,
            "scope2_location_based": "1234",
            "scope2_market_based": "1234",
            "ghg_emission_associated": null,
            "ghg_emission_other": null,
            "ghg_emissions_products": null
        },
        "2018": {
            "year": "2018",
            "scope1": null,
            "scope2_location_based": "13423",
            "scope2_market_based": "543654",
            "ghg_emission_associated": "354",
            "ghg_emission_other": "423423",
            "ghg_emissions_products": "967"
        },
        "2019": {
            "year": "2019",
            "scope1": "12459",
            "scope2_location_based": "235434",
            "scope2_market_based": "42343",
            "ghg_emission_associated": "12459",
            "ghg_emission_other": "6545476",
            "ghg_emissions_products": "445663"
        },
        "2020": {...},
        "2021": {...},
        "2022": {...}
    }
}

I wrote this aggregation query obtaining three object with year as key

db.report_data_fields.aggregate(
    {
        $project: {
            _id: 0,
            ghg_emissions_scope1year: {
                $arrayToObject: {
                    $map: {
                        input: '$dataFields.ghg_emissions_scope1.iterations',
                        as: 'iter',
                        in: {
                            "k": "$$iter.year",
                            "v": "$$iter"
                        }
                    }
                }
            },
            ghg_emissions_scope2year: {
                $arrayToObject: {
                    $map: {
                        input: '$dataFields.ghg_emissions_scope2.iterations',
                        as: 'iter',
                        in: {
                            "k": "$$iter.year",
                            "v": "$$iter"
                        }
                    }
                }
            },
            ghg_emissions_scope3year: {
                $arrayToObject: {
                    $map: {
                        input: '$dataFields.ghg_emissions_scope3.iterations',
                        as: 'iter',
                        in: {
                            "k": "$$iter.year",
                            "v": "$$iter"
                        }
                    }
                }
            }
        }
    }
);

This is te object obtained

{
  "ghg_emissions_scope1year": {
    "2017": {
      "year": "2017",
      "scope1": "1793"
    }
  },
  "ghg_emissions_scope2year": {
    "2018": {
      "scope2_market_based": "0",
      "year": "2018",
      "scope2_location_based": "10352"
    }
  },
  "ghg_emissions_scope3year": {
    "2021": {
      "ghg_emission_associated": "0",
      "ghg_emissions_transportation": "124371",
      "ghg_emission_other": "36599",
      "year": "2021",
      "ghg_emissions_products": "0"
    }
  }
}

I've tried to add a step with $addFields and $mergeObject

{$addFields: {'years': {$mergeObjects: ["$ghg_emissions_scope1year","$ghg_emissions_scope2year","$ghg_emissions_scope3year"]}}}

But year fields only contains ghg_emissions_scope3year values. How can I merge the three object obtained in my projection?

CodePudding user response:

One option is to create an array of years and $map base on it:

  1. create iterations array
  2. Find the index of each year on each array
  3. $merge an "empty" item with the data from each array according to the year's index
  4. Build the object using $arrayToObject
db.collection.aggregate([
  {$addFields: {iterations: {$setUnion: [
          "$dataFields.ghg_emissions_scope1.iterations.year",
          "$dataFields.ghg_emissions_scope2.iterations.year",
          "$dataFields.ghg_emissions_scope3.iterations.year"
  ]}}},
  {$set: {iterations: {$map: {
          input: "$iterations",
          in: {
            year: "$$this",
            indexA: {$indexOfArray: [
                "$dataFields.ghg_emissions_scope1.iterations.year",
                "$$this"
              ]},
            indexB: {$indexOfArray: [
                "$dataFields.ghg_emissions_scope2.iterations.year",
                "$$this"
            ]},
            indexC: {$indexOfArray: [
                "$dataFields.ghg_emissions_scope3.iterations.year",
                "$$this"
            ]}            
          }
  }}}},
  {$project: {
      companyId: 1,
      data: {$map: {
          input: "$iterations",
          in: {$mergeObjects: [
              {
                "ghg_emission_associated": null,
                "ghg_emission_other": null,
                "ghg_emissions_products": null,
                "scope1": null,
                "scope2_location_based": null,
                "scope2_market_based": null,
                "year": null
              },
              {$cond: [
                  {$gte: ["$$this.indexA", 0]},
                  {$arrayElemAt: [
                      "$dataFields.ghg_emissions_scope1.iterations",
                      "$$this.indexA"
                  ]},
                  {}
              ]},
              {$cond: [
                  {$gte: ["$$this.indexB", 0]},
                  {$arrayElemAt: [
                      "$dataFields.ghg_emissions_scope2.iterations",
                      "$$this.indexB"
                  ]},
                  {}
              ]},
              {$cond: [
                  {$gte: ["$$this.indexC", 0]},
                  {$arrayElemAt: [
                      "$dataFields.ghg_emissions_scope3.iterations",
                      "$$this.indexC"
                  ]},
                  {}
              ]}
          ]}
      }}
  }},
  {$project: {
      companyId: 1,
      year: {$arrayToObject: {$map: {
            input: "$data",
            in: {k: "$$this.year", v: "$$this"}
      }}}
  }}
])

See how it works on the playground example

  • Another option is to unwind all iterations and group again by year
  • Related