I am trying to parse a machine/software generated file type into a JSON file type for easy analysis with other software and other Python scripts. The file is structured similarly to a JSON file, but not automatically convertible as far as I can tell.
The file looks similar to this (.bpf filetype):
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
I want the end product to look like this:
{
"name": "fileName.bpf",
"style": "502",
"last_modified": "",
"BuildInfo": {
"Version": "1128",
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": "3:22:53",
... # parameters continue
},
"PartInfo-001": [
"id": "1" #incremented for each part
"BandOffset": {
"Category_mask": "65537",
"GUIName": "Stripe Offset",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_z": "0.000000",
"profile_value": "0.243307",
}
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000",
}
... # parameters continue
... # PartInfo repeats
]
}
The file is over 55,000 lines with too many parameters to manually create a dictionary out of them. I started writing a script to parse out just a subsection of the file for one PartInfo into a python dictionary, then save to a JSON file, but the script runs through none of the document.
# Python program to convert text
# file to JSON
import json
def main():
# the file to be converted to
# json format
filename = r'samplePartParameters.txt'
# dictionary where the lines from
# text will be stored
partParameters = {}
paramStart = []
paramEnd = []
# creating dictionary
count = 0
with open(filename, 'r') as file:
for currentLine in file.readlines():
if currentLine[0:4:1] == 'PARAM':
paramStart.append(count)
elif currentLine[0:2:1] == 'END':
paramEnd.append(count)
content = file.readlines()
numParam = len(paramEnd)
for paramNum in range(0, numParam-1, 1):
paramName = content[paramNum][6:]
partParameters[paramName] = {}
for propertyNum in range(paramStart[paramNum] 1, paramEnd[paramNum]-1, 1):
splitPos = content[paramNum].find("=")
propertyName = content[paramNum][:,splitPos-1]
propertyVal = content[paramNum][splitPos 1,:]
partParameters[paramName][propertyName] = propertyVal
# creating json file
# the JSON file is named as test1
out_file = open("test1.json", "w")
json.dump(partParameters, out_file, indent = 4, sort_keys = False)
out_file.close()
if __name__ == "__main__":
print("Running.")
main()
print("Done.")
Please let me know if you see an error in my code, or if you know of an easier way to go about this.
Thanks!
CodePudding user response:
The following python module should help. Please see the example:
!pip install ttp
from ttp import ttp
import json
data_to_parse = """
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo-001
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe_Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
STRUCTURE PartInfo-002
PARAM BandOffset
Category_mask = 65538
GUIName = "Stripe_Offset"
Order = 39
Type = 4
Units = 2 # Inches
ZUnits = 1
profile_z= 0.100000 profile_value = 0.253307
ENDPARAM
PARAM Color
B = 0.390000
G = 0.390000
R = 0.390000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
"""
ttp_template = """
<group name="MyData">
PACKET {{name}}
STYLE {{style}}
<group name="{{INFO}}">
STRUCTURE {{INFO}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
</group>
</group>
<group name="{{INFO}}">
STRUCTURE {{INFO}}-{{id}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
Category_mask = {{Category_mask}}
GUIName = {{GUIName}}
Order = {{Order}}
Type = {{Type}}
ZUnits = {{ZUnits}}
profile_z= {{profile_z}} profile_value = {{profile_value}}
B = {{B}}
G = {{G}}
R = {{R}}
</group>
</group>
</group>
"""
parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()
# print result in JSON format
results = parser.result(format='json')[0]
# converting str to json.
result = json.loads(results)
print(results)
See the output as:
[
{
"MyData": {
"BuildInfo": {
"Version": {
"Value": "1128"
},
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": {
"Value": "\"3:22:53\""
}
},
"PartInfo": [
{
"BandOffset": {
"Category_mask": "65537",
"GUIName": "\"Stripe_Offset\"",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_value": "0.243307",
"profile_z": "0.000000"
},
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000"
},
"id": "001"
},
{
"BandOffset": {
"Category_mask": "65538",
"GUIName": "\"Stripe_Offset\"",
"Order": "39",
"Type": "4",
"Units": "2",
"ZUnits": "1",
"profile_value": "0.253307",
"profile_z": "0.100000"
},
"Color": {
"B": "0.390000",
"G": "0.390000",
"R": "0.390000"
},
"id": "002"
}
],
"name": "fileName.bpf",
"style": "502"
}
}
]