Suppose I have multiple txt files that look like this (indentation is 4 spaces):
key1=value1
key2
key2_1=value2_1
key2_2
key2_2_1=value2_2_1
key2_3=value2_3
key3=value3
How do I convert one (or all) of them into this format:
{
'key1':'value1',
'key2':
{
'key2_1':'value2_1',
'key2_2':
{
'key2_2_1':'value2_2_1'
},
'key2_3':'value2_3'
},
'key3':'value3'
}
or in a flattened dictionary format.
I appreciate any comments. Best,
Nigel
CodePudding user response:
You could try something like below:
# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
indentCount = 0
indentVal = " "
for position, eachLine in enumerate(inputString):
if "=" not in eachLine:
continue
else:
strSplit = eachLine.split("=", 1)
#get previous indentation
prevIndent = inputString[position].count(indentVal)
newVal = (indentVal * (prevIndent 1)) strSplit[1]
inputString[position] = strSplit[0] '\n'
inputString.insert(position 1, newVal)
flatList = "".join(inputString)
return flatList
# helper class for node usage
class Node:
def __init__(self, indented_line):
self.children = []
self.level = len(indented_line) - len(indented_line.lstrip())
self.text = indented_line.strip()
def add_children(self, nodes):
childlevel = nodes[0].level
while nodes:
node = nodes.pop(0)
if node.level == childlevel: # add node as a child
self.children.append(node)
elif node.level > childlevel: # add nodes as grandchildren of the last child
nodes.insert(0,node)
self.children[-1].add_children(nodes)
elif node.level <= self.level: # this node is a sibling, no more children
nodes.insert(0,node)
return
def as_dict(self):
if len(self.children) > 1:
return {self.text: [node.as_dict() for node in self.children]}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text
# process our file here
with open(filename, 'r') as fh:
fileContent = fh.readlines()
fileParse = convertIndentation(fileContent)
# convert equals signs to indentation
root = Node('root')
root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
d = root.as_dict()['root']
# this variable is storing the json output
jsonOutput = json.dumps(d, indent = 4, sort_keys = False)
print(jsonOutput)
That should yield some output like below:
[
{
"key1": "value1"
},
{
"key2": [
{
"key2_1": "value2_1"
},
{
"key2_2": {
"key2_2_1": "value2_2_1"
}
},
{
"key2_3": "value2_3"
}
]
},
{
"key3": "value3"
}
]