I have a config server where we read the service config from. In there we have a yaml file that I need to read but it has a weird format on the server looking like:
{
"document[0].Name": "os",
"document[0].Rules.Rule1": false,
"document[0].Rules.Rule2": true,
"document[0].MinScore": 100,
"document[0].MaxScore": 100,
"document[0].ClusterId": 22,
"document[0].Enabled": true,
"document[0].Module": "device",
"document[0].Description": "",
"document[0].Modified": 1577880000000,
"document[0].Created": 1577880000000,
"document[0].RequiredReview": false,
"document[0].Type": "NO_CODE",
"document[1].Name": "rule with params test",
"document[1].Rules.Rule": false,
"document[1].MinScore": 100,
"document[1].MaxScore": 100,
"document[1].ClusterId": 29,
"document[1].Enabled": true,
"document[1].Module": "device",
"document[1].Description": "rule with params test",
"document[1].Modified": 1577880000000,
"document[1].Created": 1577880000000,
"document[1].RequiredReview": false,
"document[1].Type": "NO_CODE",
"document[1].ParametersRules[0].Features.feature1.op": ">",
"document[1].ParametersRules[0].Features.feature1.value": 10,
"document[1].ParametersRules[0].Features.feature2.op": "==",
"document[1].ParametersRules[0].Features.feature2.value": true,
"document[1].ParametersRules[0].Features.feature3.op": "range",
"document[1].ParametersRules[0].Features.feature3.value[0]": 4,
"document[1].ParametersRules[0].Features.feature3.value[1]": 10,
"document[1].ParametersRules[0].Features.feature4.op": "!=",
"document[1].ParametersRules[0].Features.feature4.value": "None",
"document[1].ParametersRules[0].DecisionType": "all",
"document[1].ParametersRules[1].Features.feature5.op": "<",
"document[1].ParametersRules[1].Features.feature5.value": 1000,
"document[1].ParametersRules[1].DecisionType": "any"
}
and this is how the dict supposed to look like (might not be perfect I did it by hand):
[
{
"Name": "os",
"Rules": { "Rule1": false, "Rule2": true },
"MinScore": 100,
"MaxScore": 100,
"ClusterId": 22,
"Enabled": true,
"Module": "device",
"Description": "",
"Modified": 1577880000000,
"Created": 1577880000000,
"RequiredReview": false,
"Type": "NO_CODE"
},
{
"Name": "rule with params test",
"Rules": { "Rule": false},
"MinScore": 100,
"MaxScore": 100,
"ClusterId": 29,
"Enabled": true,
"Module": "device",
"Description": "rule with params test",
"Modified": 1577880000000,
"Created": 1577880000000,
"RequiredReview": false,
"Type": "NO_CODE",
"ParametersRules":[
{"Features": {"feature1": {"op": ">", "value": 10},
"feature2": {"op": "==", "value": true},
"feature3": {"op": "range", "value": [4,10]},
"feature4": {"op": "!=", "value": "None"}} ,
"DecisionType": "all"},
{"Features": { "feature5": { "op": "<", "value": 1000 }},
"DecisionType": "any"}
]
}
]
I don't have a way to change how the file is uploaded to the server (it's a different team and quite the headache) so I need to parse it using python. My thought is that someone probably encountered it before so there must be a package that solves it, and I hoped that someone here might know.
Thanks.
CodePudding user response:
i have a sample , i hope it'll help you
import yaml
import os
file_dir = os.path.dirname(os.path.abspath(__file__))
config = yaml.full_load(open(f"{file_dir}/file.json"))
yaml_file = open(f'{file_dir}/meta.yaml', 'w ')
yaml.dump(config, yaml_file, allow_unicode=True) # this one make your json file to yaml
your current output is :
- ClusterId: 22
Created: 1577880000000
Description: ''
Enabled: true
MaxScore: 100
MinScore: 100
Modified: 1577880000000
Module: device
Name: os
RequiredReview: false
Rules:
Rule1: false
Rule2: true
Type: NO_CODE
- ClusterId: 29
Created: 1577880000000
Description: rule with params test
Enabled: true
MaxScore: 100
MinScore: 100
Modified: 1577880000000
Module: device
Name: rule with params test
ParametersRules:
- DecisionType: all
Features:
feature1:
op: '>'
value: 10
feature2:
op: ==
value: true
feature3:
op: range
value:
- 4
- 10
feature4:
op: '!='
value: None
- DecisionType: any
Features:
feature5:
op: <
value: 1000
RequiredReview: false
Rules:
Rule: false
Type: NO_CODE
CodePudding user response:
Here is my approach so far. It's far from perfect, but hope it gives you an idea of how to tackle it.
def clean_value(o: str | bool | int) -> str | bool | int | None:
"""handle int, None, or bool values encoded as a string"""
if isinstance(o, str):
lowercase = o.lower()
if lowercase.isnumeric():
return int(o)
elif lowercase == 'none':
return None
elif lowercase in ('true', 'false'):
return lowercase == 'true'
# return eval(o.capitalize())
return o
def process(o: dict):
docs_list = []
doc: dict[str, str | bool | int | None]
doc_idx: int
def add_new_doc(new_idx: int):
"""Push new item to result list, and increment index."""
nonlocal doc_idx, doc
doc_idx = new_idx
doc = {}
docs_list.append(doc)
add_new_doc(0)
for k, v in o.items():
doc_id, *parts = k.split('.')
curr_doc_idx = int(doc_id.rsplit('[', 1)[1].rstrip(']'))
if curr_doc_idx > doc_idx:
add_new_doc(curr_doc_idx)
if len(parts) == 1:
final_val = clean_value(v)
else:
final_val = temp_dict = {}
for p in parts[1:-1]:
temp_dict = temp_dict[p] = {}
temp_dict[parts[-1]] = clean_value(v)
doc[parts[0]] = final_val
return docs_list
if __name__ == '__main__':
import json
from pprint import pprint
j = """{
"document[0].Name": "os",
"document[0].Rules.Rule1": false,
"document[0].Rules.Rule2": "true",
"document[0].MinScore": 100,
"document[0].MaxScore": 100,
"document[0].ClusterId": 22,
"document[0].Enabled": true,
"document[0].Module": "device",
"document[0].Description": "",
"document[0].Modified": 1577880000000,
"document[0].Created": 1577880000000,
"document[0].RequiredReview": false,
"document[0].Type": "NO_CODE",
"document[1].Name": "rule with params test",
"document[1].Rules.Rule": false,
"document[1].MinScore": 100,
"document[1].MaxScore": 100,
"document[1].ClusterId": 29,
"document[1].Enabled": true,
"document[1].Module": "device",
"document[1].Description": "rule with params test",
"document[1].Modified": 1577880000000,
"document[1].Created": 1577880000000,
"document[1].RequiredReview": false,
"document[1].Type": "NO_CODE",
"document[1].ParametersRules[0].Features.feature1.op": ">",
"document[1].ParametersRules[0].Features.feature1.value": 10,
"document[1].ParametersRules[0].Features.feature2.op": "==",
"document[1].ParametersRules[0].Features.feature2.value": true,
"document[1].ParametersRules[0].Features.feature3.op": "range",
"document[1].ParametersRules[0].Features.feature3.value[0]": 4,
"document[1].ParametersRules[0].Features.feature3.value[1]": 10,
"document[1].ParametersRules[0].Features.feature4.op": "!=",
"document[1].ParametersRules[0].Features.feature4.value": "None",
"document[1].ParametersRules[0].DecisionType": "all",
"document[1].ParametersRules[1].Features.feature5.op": "<",
"document[1].ParametersRules[1].Features.feature5.value": 1000,
"document[1].ParametersRules[1].DecisionType": "any"
}"""
d: dict[str, str | bool | int | None] = json.loads(j)
result = process(d)
pprint(result)
Result:
[{'ClusterId': 22,
'Created': 1577880000000,
'Description': '',
'Enabled': True,
'MaxScore': 100,
'MinScore': 100,
'Modified': 1577880000000,
'Module': 'device',
'Name': 'os',
'RequiredReview': False,
'Rules': {'Rule2': True},
'Type': 'NO_CODE'},
{'ClusterId': 29,
'Created': 1577880000000,
'Description': 'rule with params test',
'Enabled': True,
'MaxScore': 100,
'MinScore': 100,
'Modified': 1577880000000,
'Module': 'device',
'Name': 'rule with params test',
'ParametersRules[0]': {'DecisionType': 'all'},
'ParametersRules[1]': {'DecisionType': 'any'},
'RequiredReview': False,
'Rules': {'Rule': False},
'Type': 'NO_CODE'}]
Of course one of the problems is that it doesn't accounted for nested paths like document[1].ParametersRules[0].Features.feature1.op
which should ideally create a new sub-list to add values to.