I am parsing JS files stored in a directory to fetch out the values inside tags
. Upon finding relevant values in the tags
using regex, I want to move those values under a new section called controls
. I have been able to get the required values in the controls
variable using the script below:
def parse_sigs(target_folder):
try:
count = 0
for root, dir, files in os.walk(target_folder):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension.lower() == ".js":
print(file)
filename = os.path.join(root, file)
print(f'Processing : {filename}')
with open(filename, "r") as f_in:
data = f_in.read()
data = re.search(r"\btags\s*:\s*(\[.*?\])", data, flags=re.S)
if data:
data = literal_eval(data.group(1))
#print(type(data)
cregex = re.compile(r".*\-.*\-.*[\.|\-].*")
controls = list(filter(cregex.match, data))
count =1
print(controls)
print(count)
except Exception as e:
print(f"Error in Processing {filename} Skipping -- ")
print(e)
I am not able to think of a way to now insert the controls
list as a new field into the JS file and update it. It should appear after the tags
field.
An example JS file looks like:
var fs = require('fs');
var path = require('path')
const remediation = fs.readFileSync(path.resolve(__dirname, '../sig/file.md'), 'utf-8')
module.exports = {
status: "ENABLED",
sig: {
name: "iam",
tags: [
"aws",
"iam",
"compliance",
"chg-02.1",
"AWS-CIS-v1.4",
"AWS-CIS-v1.4-1.12",
"AWS-CIS-v1.4-1.14",
"SOC-2",
"SOC-2-CC6.8",
"NIST-800-53rev5",
"NIST-800-53rev5-CM-3(1)"
],
result: "A.id, A.name",
result_header: [
"account_id",
"account_name",
],
primary_resource: "user_arn",
},
expected: {
"all":
[
{
user_name: "2506"
}
]
}
};
My end goal is to create a JS file whose tags
and newly added controls
section looks like this:
tags: [
"aws",
"iam",
"compliance",
"chg-02.1",
"AWS-CIS-v1.4",
"SOC-2",
"NIST-800-53rev5"
],
controls: [
"AWS-CIS-v1.4-1.12",
"AWS-CIS-v1.4-1.14",
"NIST-800-53rev5-CM-3(1)",
"SOC-2-CC6.8",
],
Edit:
Able to get it working with the solution provided by @Dan-Dev.
The only thing which is still out of place is the closing ]
. Is there a way to fix the closing bracket to indent correctly?
tags: [
"aws",
"docdb",
"kms",
"configuration-check",
],
controls: [
"SOC-2-CC6.1",
"NIST-800-53rev5-SC-12"
]
CodePudding user response:
You can extract the tags and convert them to JSON using the package jsonnet
iterate over them using your regex then build a string to replace the original tags like this:
import json
import os
import re
import _jsonnet
def parse_sigs(target_folder):
count = 0
cregex = re.compile(r".*-.*-.*[.|\-].*")
for root, _, files in os.walk(target_folder):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension.lower() == ".js":
new_tags = []
controls = []
# pint(file)
filename = os.path.join(root, file)
print(f'Processing : {filename}')
try:
with open(filename, "r") as f_in:
data = f_in.read()
snippet = re.findall(r'module.exports = (.*);', data, flags=re.S)[0]
json_data = json.loads(_jsonnet.evaluate_snippet('snippet', snippet))
# print(json.dumps(json_data['sig']['tags'], indent=4))
for tag in json_data['sig']['tags']:
if re.match(cregex, tag):
controls.append(tag)
else:
new_tags.append(tag)
new_string = f"tags: {json.dumps(new_tags, indent=8)},\n controls: {json.dumps(controls, indent=8)}"
data = re.sub(r'tags: \[(.*?)]', new_string, data, flags=re.S)
if data:
print(data)
count = 1
print(count)
except Exception as e:
print(f"Error in Processing {filename} Skipping -- ")
print(e)
parse_sigs('./')
Outputs:
Processing : ./test.js
var fs = require('fs');
var path = require('path')
const remediation = fs.readFileSync(path.resolve(__dirname, '../sig/file.md'), 'utf-8')
module.exports = {
status: "ENABLED",
sig: {
name: "iam",
tags: [
"aws",
"iam",
"compliance",
"chg-02.1",
"SOC-2",
"NIST-800-53rev5"
],
controls: [
"AWS-CIS-v1.4",
"AWS-CIS-v1.4-1.12",
"AWS-CIS-v1.4-1.14",
"SOC-2-CC6.8",
"NIST-800-53rev5-CM-3(1)"
],
result: "A.id, A.name",
result_header: [
"account_id",
"account_name",
],
primary_resource: "user_arn",
},
expected: {
"all":
[
{
user_name: "2506"
}
]
}
};
1
UPDATED WITH FORMATTING:
import json
import os
import re
import _jsonnet
import jsbeautifier
opts = jsbeautifier.default_options()
opts.indent_size = 2
opts.space_in_empty_paren = True
def parse_sigs(target_folder):
count = 0
cregex = re.compile(r".*-.*-.*[.|\-].*")
for root, _, files in os.walk(target_folder):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension.lower() == ".js":
new_tags = []
controls = []
# pint(file)
filename = os.path.join(root, file)
print(f'Processing : {filename}')
try:
with open(filename, "r") as f_in:
data = f_in.read()
snippet = re.findall(r'module.exports = (.*);', data, flags=re.S)[0]
json_data = json.loads(_jsonnet.evaluate_snippet('snippet', snippet))
# print(json.dumps(json_data['sig']['tags'], indent=4))
for tag in json_data['sig']['tags']:
if re.match(cregex, tag):
controls.append(tag)
else:
new_tags.append(tag)
new_string = f"tags: {json.dumps(new_tags, indent=8)},\n controls: {json.dumps(controls, indent=8)}"
data = re.sub(r'tags: \[(.*?)]', new_string, data, flags=re.S)
data = jsbeautifier.beautify(data, opts)
if data:
print(data)
count = 1
print(count)
except Exception as e:
print(f"Error in Processing {filename} Skipping -- ")
print(e)
parse_sigs('./')