I have a set of data that I would like to extract from a txt file and stored in a specific format. The data is is currently in a txt file like so:
set firewall family inet filter INBOUND term TEST from source-address 1.1.1.1/32
set firewall family inet filter INBOUND term TEST from destination-prefix-list test-list
set firewall family inet filter INBOUND term TEST from protocol udp
set firewall family inet filter INBOUND term TEST from destination-port 53
set firewall family inet filter INBOUND term TEST then accept
set firewall family inet filter PROD term LAN from source-address 4.4.4.4/32
set firewall family inet filter PROD term LAN from source-address 5.5.5.5/32
set firewall family inet filter PROD term LAN from protocol tcp
set firewall family inet filter PROD term LAN from destination-port 443
set firewall family inet filter PROD term LAN then deny
I would like the data to be structured to where each rule has their respective options placed into dictionary and appended to a list. For example:
Expected Output
[{'Filter': 'INBOUND', 'Term': 'TEST', 'SourceIP': '1.1.1.1/32', 'DestinationList': 'test-list', 'Protocol': 'udp', 'DestinationPort': '53', 'Action': 'accept},
{'Filter': 'PROD', 'Term': 'LAN', 'SourceIP': ['4.4.4.4/32','5.5.5.5/32'], 'Protocol': 'tcp', 'DestinationPort': '443', 'Action': 'deny'}]
As you can see there may be instances where a certain trait does not exist for a rule. I would also have to add multiple IP addresses as a value. I am currently using Regex to match the items in the txt file. My thought was to iterate through each line in the file, find any matches and add them as a key-value pair to a dictionary.
Once I get an "accept" or "deny", that should signal the end of the rule and I will append the dictionary to the list, clear the dictionary and start the process with the next rule. However this does not seem to be working as intended. My Regex seems fine but I cant seem to figure out the logic when processing each line, adding multiple values to a value list, and adding values to the dictionary. Here is my code below
import re
data_file = "sample_data.txt"
##### REGEX PATTERNS #####
filter_re = r'(?<=filter\s)(.*)(?=\sterm.)'
term_re = r'(?<=term\s)(.*)(?=\sfrom|\sthen)'
protocol_re = r'(?<=protocol\s)(.*)'
dest_port_re = r'(?<=destination-port\s)(.*)'
source_port_re = r'(?<=from\ssource-port\s)(.*)'
prefix_source_re = r'(?<=from\ssource-prefix-list\s)(.*)'
prefix_dest_re = r'(?<=from\sdestination-prefix-list\s)(.*)'
source_addr_re = r'(?<=source-address\s)(.*)'
dest_addr_re = r'(?<=destination-address\s)(.*)'
action_re = r'(?<=then\s)(deny|accept)'
pattern_list = [filter_re, term_re, source_addr_re, prefix_source_re, source_port_re, dest_addr_re, prefix_dest_re, dest_port_re, protocol_re, action_re]
pattern_headers = ["Filter", "Term", "Source_Address", "Source_Prefix_List", "Source_Port", "Destination_Address," "Destination_Prefix_List", "Destination_Port", "Protocol", "Action"]
final_list = []
def open_file(file):
rule_dict = {}
with open(file, 'r') as f:
line = f.readline()
while line:
line = f.readline().strip()
for header, pattern in zip(pattern_headers,pattern_list):
match = re.findall(pattern, line)
if len(match) != 0:
if header != 'accept' or header != 'deny':
rule_dict[header] = match[0]
else:
rule_dict[header] = match[0]
final.append(rule_dict)
rule_dict = {}
print(rule_dict)
print(final_list)
The final list is empty and the rule_dict only contains the final rule from the text file not the both of the rulesets. Any guidance would be greatly appreciated.
CodePudding user response:
There are few little mistakes in your code:
- in your while loop
f.readline()
needs to be at the end, otherwise you already begin in line 2 (readline called twice before doing anything) final_list
has to be defined in your function and also used correctly then (instead of only "final"if header != 'accept' or header != 'deny':
: here needs to be anand
. One of them is always True, so theelse
part never gets executed.- you need to check the match for
accept|deny
, not theheader
- for example in
Source_IP
you want to have a list with all IP's you find. The way you do it, the value would always be updated and only the last found IP will be in yourfinal_list
def open_file(file):
final_list = []
rule_dict = {}
with open(file) as f:
line = f.readline()
while line:
line = line.strip()
for header, pattern in zip(pattern_headers, pattern_list):
match = re.findall(pattern, line)
if len(match) != 0:
if (match[0] != "accept") and (match[0] != "deny"):
rule_dict.setdefault(header, set()).add(match[0])
else:
rule_dict.setdefault(header, set()).add(match[0])
#adjust values of dict to list (if multiple values) or just a value (instead of set) before appending to list
final_list.append({k:(list(v) if len(v)>1 else v.pop()) for k,v in rule_dict.items()})
rule_dict = {}
line = f.readline()
print(f"{rule_dict=}")
print(f"{final_list=}")
open_file(data_file)
Output:
rule_dict={}
final_list=[
{
'Filter': 'INBOUND',
'Term': 'TEST',
'Source_Address': '1.1.1.1/32',
'Destination_Prefix_List': 'test-list',
'Protocol': 'udp', 'Destination_Port': '53',
'Action': 'accept'
},
{
'Filter': 'PROD',
'Term': 'LAN',
'Source_Address': ['5.5.5.5/32', '4.4.4.4/32'],
'Protocol': 'tcp',
'Destination_Port': '443',
'Action': 'deny'
}
]
CodePudding user response:
There are few things that i have change in your code:
- When "accept" and "deny" found in action then append final_dict in final_list and empty final_dict
- allow to add more than one SourceIP- for that create list in value of SourceIP when more than SourceIP get
import re
data_file = "/home/hiraltalsaniya/Documents/Hiral/test"
filter_re = r'(?<=filter\s)(.*)(?=\sterm.)'
term_re = r'(?<=term\s)(.*)(?=\sfrom|\sthen)'
protocol_re = r'(?<=protocol\s)(.*)'
dest_port_re = r'(?<=destination-port\s)(.*)'
source_port_re = r'(?<=from\ssource-port\s)(.*)'
prefix_source_re = r'(?<=from\ssource-prefix-list\s)(.*)'
prefix_dest_re = r'(?<=from\sdestination-prefix-list\s)(.*)'
source_addr_re = r'(?<=source-address\s)(.*)'
dest_addr_re = r'(?<=destination-address\s)(.*)'
action_re = r'(?<=then\s)(deny|accept)'
pattern_list = [filter_re, term_re, source_addr_re, prefix_source_re, source_port_re, dest_addr_re, prefix_dest_re,
dest_port_re, protocol_re, action_re]
pattern_headers = ["Filter", "Term", "SourceIP", "Source_Prefix_List", "Source_Port", "Destination_Address",
"DestinationList", "Destination_Port", "Protocol", "Action"]
def open_file(file):
final_dict: dict = dict()
final_list: list = list()
with open(file) as f:
for line in f:
for header, pattern in zip(pattern_headers, pattern_list):
match = re.search(pattern, line)
if match:
# check if accept or deny it means the end of the rule then empty dictionary
if str(match.group()) == "accept" or match.group() == "deny":
final_list.append(final_dict)
final_dict: dict = dict()
# if more than one SourceIP then create list of SourceIP
elif header == "SourceIP" and header in final_dict.keys():
final_dict[header] = [final_dict[header]]
final_dict.setdefault(header, final_dict[header]).append(match.group())
else:
final_dict[header] = match.group()
print("final_list=", final_list)
open_file(data_file)
Output:
final_list= [{'Filter': 'INBOUND',
'Term': 'TEST',
'SourceIP': '1.1.1.1/32',
'DestinationList': 'test-list',
'Protocol': 'udp',
'Destination_Port': '53'
},
{'Filter': 'PROD',
'Term': 'LAN',
'SourceIP': ['4.4.4.4/32', '5.5.5.5/32'],
'Protocol': 'tcp',
'Destination_Port': '443'
}]