Home > Software engineering >  From text doc to JSON with python
From text doc to JSON with python

Time:10-22

Suppose I have multiple txt files that look like this (indentation is 4 spaces):

key1=value1
key2
    key2_1=value2_1
    key2_2
        key2_2_1=value2_2_1
    key2_3=value2_3
key3=value3

How do I convert one (or all) of them into this format:

{
'key1':'value1',
'key2':
    {
    'key2_1':'value2_1',
    'key2_2':
        {
        'key2_2_1':'value2_2_1'
        },
    'key2_3':'value2_3'
    },
'key3':'value3'
}

or in a flattened dictionary format.

I appreciate any comments. Best,

Nigel

CodePudding user response:

You could try something like below:

# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
    indentCount = 0
    indentVal = "    "
    for position, eachLine in enumerate(inputString):
        if "=" not in eachLine:
            continue
        else:
            strSplit = eachLine.split("=", 1)
            #get previous indentation
            prevIndent = inputString[position].count(indentVal)
            newVal = (indentVal * (prevIndent   1))   strSplit[1]
            inputString[position] = strSplit[0]   '\n'
            inputString.insert(position 1, newVal)
    flatList = "".join(inputString)
    return flatList

# helper class for node usage
class Node:
    def __init__(self, indented_line):
        self.children = []
        self.level = len(indented_line) - len(indented_line.lstrip())
        self.text = indented_line.strip()

    def add_children(self, nodes):
        childlevel = nodes[0].level

        while nodes:
            node = nodes.pop(0)
            if node.level == childlevel: # add node as a child
                self.children.append(node)
            elif node.level > childlevel: # add nodes as grandchildren of the last child
                nodes.insert(0,node)
                self.children[-1].add_children(nodes)
            elif node.level <= self.level: # this node is a sibling, no more children
                nodes.insert(0,node)
                return

    def as_dict(self):
        if len(self.children) > 1:
            return {self.text: [node.as_dict() for node in self.children]}
        elif len(self.children) == 1:
            return {self.text: self.children[0].as_dict()}
        else:
            return self.text

# process our file here
with open(filename, 'r') as fh:
    fileContent = fh.readlines()
    fileParse = convertIndentation(fileContent)
    # convert equals signs to indentation
    root = Node('root')
    root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
    d = root.as_dict()['root']
    # this variable is storing the json output
    jsonOutput = json.dumps(d, indent = 4, sort_keys = False)
    print(jsonOutput)

That should yield some output like below:

[
    {
        "key1": "value1"
    },
    {
        "key2": [
            {
                "key2_1": "value2_1"
            },
            {
                "key2_2": {
                    "key2_2_1": "value2_2_1"
                }
            },
            {
                "key2_3": "value2_3"
            }
        ]
    },
    {
        "key3": "value3"
    }
]
  • Related