Home > Enterprise >  How to get all keys from deeply nested json using python?
How to get all keys from deeply nested json using python?

Time:10-22

Problem Statement : Remove/Rename Special characters (#,$, Back slash, & etc.) from json keys and replace in the main json file.

Approach :

  1. I am trying to get all the keys of deeply nested json first.
  2. Check for special characters in each key then rename/replace and write back to the json file.

Issue :

  1. Json I have is very deep nested so the logic i have written works for simple json but not deep nested json.

Code :

import json
import base64

def getKeys(object, prev_key = None, keys = []):
    if type(object) != type({}):
        keys.append(prev_key)
        return keys
    new_keys = []
    for k, v in object.items():
        if prev_key != None:
            new_key = "{}.{}".format(prev_key, k)
        else:
            new_key = k
        new_keys.extend(getKeys(v, new_key, []))
    return new_keys

Above code works for below json : It print all the json keys

json_string= '{"Relate:0/name": "securityhub-ec2-instance-managed-by-ssm-dc0c9f18","RelatedAWSResources:0/type": "AWS::Config::ConfigRule","aws/securityhub/ProductName": "Security Hub","aws/securityhub/CompanyName": "AWS"}'

Output :

['Relate:0/name', 'RelatedAWSResources:0/type', 'aws/securityhub/ProductName', 'aws/securityhub/CompanyName']

But it does not work for below json :

{
  "version": "0",
  "id": "ffd8a756-9fe6-fa54-af4e-cf85fa3d2896",
  "detail-type": "Security Hub Findings - Imported",
  "source": "aws.securityhub",
  "account": "220307202362",
  "time": "2021-10-17T14:26:25Z",
  "region": "us-west-2",
  "resources": [
    "arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
  ],
  "detail": {
    "findings": [
      {
        "ProductArn": "arn:aws:securityhub:us-west-2::product/aws/securityhub",
        "Types": [
          "Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
        ],
        "Description": "This control checks for the CloudWatch metric filters using the following pattern { $.userIdentity.type = \"Root\" && $.userIdentity.invokedBy NOT EXISTS && $.eventType != \"AwsServiceEvent\" } It checks that the log group name is configured for use with active multi-region CloudTrail, that there is at least one Event Selector for a Trail with IncludeManagementEvents set to true and ReadWriteType set to All, and that there is at least one active subscriber to an SNS topic associated with the alarm.",
        "Compliance": {
          "Status": "FAILED",
          "StatusReasons": [
            {
              "Description": "Multi region CloudTrail with the required configuration does not exist in the account",
              "ReasonCode": "CLOUDTRAIL_MULTI_REGION_NOT_PRESENT"
            }
          ],
          "RelatedRequirements": [
            "PCI DSS 7.2.1"
          ]
        },
        "ProductName": "Security Hub",
        "FirstObservedAt": "2021-10-17T14:26:18.383Z",
        "CreatedAt": "2021-10-17T14:26:18.383Z",
        "LastObservedAt": "2021-10-17T14:26:21.346Z",
        "CompanyName": "AWS",
        "FindingProviderFields": {
          "Types": [
            "Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
          ],
          "Severity": {
            "Normalized": 40,
            "Label": "MEDIUM",
            "Product": 40,
            "Original": "MEDIUM"
          }
        },
        "ProductFields": {
          "StandardsArn": "arn:aws:securityhub:::standards/pci-dss/v/3.2.1",
          "StandardsSubscriptionArn": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1",
          "ControlId": "PCI.CW.1",
          "RecommendationUrl": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation",
          "StandardsControlArn": "arn:aws:securityhub:us-west-2:220307202362:control/pci-dss/v/3.2.1/PCI.CW.1",
          "aws/securityhub/ProductName": "Security Hub",
          "aws/securityhub/CompanyName": "AWS",
          "aws/securityhub/annotation": "Multi region CloudTrail with the required configuration does not exist in the account",
          "Resources:0/Id": "arn:aws:iam::220307202362:root",
          "aws/securityhub/FindingId": "arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
        },
        "Remediation": {
          "Recommendation": {
            "Text": "For directions on how to fix this issue, consult the AWS Security Hub PCI DSS documentation.",
            "Url": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation"
          }
        },
        "SchemaVersion": "2018-10-08",
        "GeneratorId": "pci-dss/v/3.2.1/PCI.CW.1",
        "RecordState": "ACTIVE",
        "Title": "PCI.CW.1 A log metric filter and alarm should exist for usage of the \"root\" user",
        "Workflow": {
          "Status": "NEW"
        },
        "Severity": {
          "Normalized": 40,
          "Label": "MEDIUM",
          "Product": 40,
          "Original": "MEDIUM"
        },
        "UpdatedAt": "2021-10-17T14:26:18.383Z",
        "WorkflowState": "NEW",
        "AwsAccountId": "220307202362",
        "Region": "us-west-2",
        "Id": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f",
        "Resources": [
          {
            "Partition": "aws",
            "Type": "AwsAccount",
            "Region": "us-west-2",
            "Id": "AWS::::Account:220307202362"
          }
        ]
      }
    ]
  }
} 

Strip punctuation function :

import string
from typing import Optional, Iterable, Union


delete_dict = {sp_character: '' for sp_character in string.punctuation}

PUNCT_TABLE = str.maketrans(delete_dict)


def strip_punctuation(s: str,
                      exclude_chars: Optional[Union[str, Iterable]] = None) -> str:
    """
    Remove punctuation and spaces from a string.

    If `exclude_chars` is passed, certain characters will not be removed
    from the string.

    """
    punct_table = PUNCT_TABLE.copy()
    if exclude_chars:
        for char in exclude_chars:
            punct_table.pop(ord(char), None)

    # Next, remove the desired punctuation from the string
    return s.translate(punct_table) 

Usage:

cleaned_keys = {json data}
for key, expected_key in cleaned_keys.items():
    actual_key = strip_punctuation(key)

CodePudding user response:

Problem Statement : Remove/Rename Special characters (#,$, Back slash, & etc.) from json keys and replace in the main json file.

If I'm understanding you correctly, you don't need to create your own function (for example a recursive function) that iterates over the JSON data.

The good news is that it's possible to achieve this when loading the JSON string to a Python object itself, through the use of the object_pairs_hook parameter. When you define a callable for this parameter, it will be passed in a list of tuples, where each tuple is a key-value pair from the JSON data. So you will only need to replace all keys in the input data that you receive.

Here is a somewhat contrived example, that wraps all JSON keys (nested or otherwise) with exclamation marks !! around them:

import json


json_string = r"""
{
  "version": "0",
  "id": "ffd8a756-9fe6-fa54-af4e-cf85fa3d2896",
  "detail-type": "Security Hub Findings - Imported",
  "source": "aws.securityhub",
  "account": "220307202362",
  "time": "2021-10-17T14:26:25Z",
  "region": "us-west-2",
  "resources": [
    "arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
  ],
  "detail": {
    "findings": [
      {
        "ProductArn": "arn:aws:securityhub:us-west-2::product/aws/securityhub",
        "Types": [
          "Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
        ],
        "Description": "This control checks for the CloudWatch metric filters using the following pattern { $.userIdentity.type = \"Root\" && $.userIdentity.invokedBy NOT EXISTS && $.eventType != \"AwsServiceEvent\" } It checks that the log group name is configured for use with active multi-region CloudTrail, that there is at least one Event Selector for a Trail with IncludeManagementEvents set to true and ReadWriteType set to All, and that there is at least one active subscriber to an SNS topic associated with the alarm.",
        "Compliance": {
          "Status": "FAILED",
          "StatusReasons": [
            {
              "Description": "Multi region CloudTrail with the required configuration does not exist in the account",
              "ReasonCode": "CLOUDTRAIL_MULTI_REGION_NOT_PRESENT"
            }
          ],
          "RelatedRequirements": [
            "PCI DSS 7.2.1"
          ]
        },
        "ProductName": "Security Hub",
        "FirstObservedAt": "2021-10-17T14:26:18.383Z",
        "CreatedAt": "2021-10-17T14:26:18.383Z",
        "LastObservedAt": "2021-10-17T14:26:21.346Z",
        "CompanyName": "AWS",
        "FindingProviderFields": {
          "Types": [
            "Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
          ],
          "Severity": {
            "Normalized": 40,
            "Label": "MEDIUM",
            "Product": 40,
            "Original": "MEDIUM"
          }
        },
        "ProductFields": {
          "StandardsArn": "arn:aws:securityhub:::standards/pci-dss/v/3.2.1",
          "StandardsSubscriptionArn": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1",
          "ControlId": "PCI.CW.1",
          "RecommendationUrl": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation",
          "StandardsControlArn": "arn:aws:securityhub:us-west-2:220307202362:control/pci-dss/v/3.2.1/PCI.CW.1",
          "aws/securityhub/ProductName": "Security Hub",
          "aws/securityhub/CompanyName": "AWS",
          "aws/securityhub/annotation": "Multi region CloudTrail with the required configuration does not exist in the account",
          "Resources:0/Id": "arn:aws:iam::220307202362:root",
          "aws/securityhub/FindingId": "arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
        },
        "Remediation": {
          "Recommendation": {
            "Text": "For directions on how to fix this issue, consult the AWS Security Hub PCI DSS documentation.",
            "Url": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation"
          }
        },
        "SchemaVersion": "2018-10-08",
        "GeneratorId": "pci-dss/v/3.2.1/PCI.CW.1",
        "RecordState": "ACTIVE",
        "Title": "PCI.CW.1 A log metric filter and alarm should exist for usage of the \"root\" user",
        "Workflow": {
          "Status": "NEW"
        },
        "Severity": {
          "Normalized": 40,
          "Label": "MEDIUM",
          "Product": 40,
          "Original": "MEDIUM"
        },
        "UpdatedAt": "2021-10-17T14:26:18.383Z",
        "WorkflowState": "NEW",
        "AwsAccountId": "220307202362",
        "Region": "us-west-2",
        "Id": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f",
        "Resources": [
          {
            "Partition": "aws",
            "Type": "AwsAccount",
            "Region": "us-west-2",
            "Id": "AWS::::Account:220307202362"
          }
        ]
      }
    ]
  }
}
"""


def clean_keys(o):
    return {f'!!{k}!!': v for k, v in o}


r = json.loads(json_string, object_pairs_hook=clean_keys)
print(r)

Result object:

{'!!version!!': '0', '!!id!!': 'ffd8a756-9fe6-fa54-af4e-cf85fa3d2896', '!!detail-type!!': 'Security Hub Findings - Imported', '!!source!!': 'aws.securityhub', '!!account!!': '220307202362', '!!time!!': '2021-10-17T14:26:25Z', '!!region!!': 'us-west-2', '!!resources!!': ['arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f'], '!!detail!!': {'!!findings!!': [{'!!ProductArn!!': 'arn:aws:securityhub:us-west-2::product/aws/securityhub', '!!Types!!': ['Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS'], '!!Description!!': 'This control checks for the CloudWatch metric filters using the following pattern { $.userIdentity.type = "Root" && $.userIdentity.invokedBy NOT EXISTS && $.eventType != "AwsServiceEvent" } It checks that the log group name is configured for use with active multi-region CloudTrail, that there is at least one Event Selector for a Trail with IncludeManagementEvents set to true and ReadWriteType set to All, and that there is at least one active subscriber to an SNS topic associated with the alarm.', '!!Compliance!!': {'!!Status!!': 'FAILED', '!!StatusReasons!!': [{'!!Description!!': 'Multi region CloudTrail with the required configuration does not exist in the account', '!!ReasonCode!!': 'CLOUDTRAIL_MULTI_REGION_NOT_PRESENT'}], '!!RelatedRequirements!!': ['PCI DSS 7.2.1']}, '!!ProductName!!': 'Security Hub', '!!FirstObservedAt!!': '2021-10-17T14:26:18.383Z', '!!CreatedAt!!': '2021-10-17T14:26:18.383Z', '!!LastObservedAt!!': '2021-10-17T14:26:21.346Z', '!!CompanyName!!': 'AWS', '!!FindingProviderFields!!': {'!!Types!!': ['Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS'], '!!Severity!!': {'!!Normalized!!': 40, '!!Label!!': 'MEDIUM', '!!Product!!': 40, '!!Original!!': 'MEDIUM'}}, '!!ProductFields!!': {'!!StandardsArn!!': 'arn:aws:securityhub:::standards/pci-dss/v/3.2.1', '!!StandardsSubscriptionArn!!': 'arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1', '!!ControlId!!': 'PCI.CW.1', '!!RecommendationUrl!!': 'https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation', '!!StandardsControlArn!!': 'arn:aws:securityhub:us-west-2:220307202362:control/pci-dss/v/3.2.1/PCI.CW.1', '!!aws/securityhub/ProductName!!': 'Security Hub', '!!aws/securityhub/CompanyName!!': 'AWS', '!!aws/securityhub/annotation!!': 'Multi region CloudTrail with the required configuration does not exist in the account', '!!Resources:0/Id!!': 'arn:aws:iam::220307202362:root', '!!aws/securityhub/FindingId!!': 'arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f'}, '!!Remediation!!': {'!!Recommendation!!': {'!!Text!!': 'For directions on how to fix this issue, consult the AWS Security Hub PCI DSS documentation.', '!!Url!!': 'https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation'}}, '!!SchemaVersion!!': '2018-10-08', '!!GeneratorId!!': 'pci-dss/v/3.2.1/PCI.CW.1', '!!RecordState!!': 'ACTIVE', '!!Title!!': 'PCI.CW.1 A log metric filter and alarm should exist for usage of the "root" user', '!!Workflow!!': {'!!Status!!': 'NEW'}, '!!Severity!!': {'!!Normalized!!': 40, '!!Label!!': 'MEDIUM', '!!Product!!': 40, '!!Original!!': 'MEDIUM'}, '!!UpdatedAt!!': '2021-10-17T14:26:18.383Z', '!!WorkflowState!!': 'NEW', '!!AwsAccountId!!': '220307202362', '!!Region!!': 'us-west-2', '!!Id!!': 'arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f', '!!Resources!!': [{'!!Partition!!': 'aws', '!!Type!!': 'AwsAccount', '!!Region!!': 'us-west-2', '!!Id!!': 'AWS::::Account:220307202362'}]}]}}

Edit: Using the strip_punctuation function provided in the question, the clean_keys function would then be defined as follows:

def clean_keys(o):
    return {strip_punctuation(k): v for k, v in o}
  • Related