I have the following data in JSON format. I want to find the number of occurrences (count) of each unique value of the "remoteIp"
key.
{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "182.2.169.59",
"serverIp": "10.114.44.4",
"latency": "0.018728s"
}
The solution I have created till now is able to fetch all the unique "remoteIp"
s and saved them to a set. But somehow I am not able to count the occurrence of each unique Ip in the log file.
import json
unique_ip = set()
request_url = set()
request_method = set()
status_code = set()
userAgent = set()
with open("automation.json") as file:
data = json.load(file)
for d2 in data:
s1 = (d2['httpRequest']['requestUrl'])
request_url.add(''.join(s1))
s2 = (d2['httpRequest']['requestMethod'])
request_method.add(''.join(s2))
s3 = (d2['httpRequest']['remoteIp'])
unique_ip.add(''.join(s3))
s4 = (str(d2['httpRequest']['status']))
status_code.add(''.join(s4))
s5 = (d2['httpRequest']['userAgent'])
userAgent.add(''.join(s5))
def printing():
a = str(len(unique_ip))
b = str(len(request_url))
c = str(len(request_method))
d = str(len(userAgent))
e = str(len(status_code))
with open("output.csv", "w") as f1:
print(
f' {a} Unique IP List = {unique_ip} \n {b} Unique URLs = {request_url} \n {c} Unique Req Method = {request_method} \n'
f' {d} Unique userAgent = {userAgent} \n {e} Unique statusCode = {status_code}', file=f1)
printing()
CodePudding user response:
Make a frequency table instead of a set. You'll go through the same amount of steps, but instead of not adding already existing IPs and other, you add to their frequency.
import json
unique_ip = {}
request_url = {}
request_method = {}
status_code = {}
userAgent = {}
with open("tmp.json") as file:
data = json.load(file)
for d2 in data:
s1 = (d2['httpRequest']['requestUrl'])
if s1 in request_url:
request_url[s1] = 1
else:
request_url[s1] = 1
s2 = (d2['httpRequest']['requestMethod'])
if s2 in request_method:
request_method[s2] = 1
else:
request_method[s2] = 1
s3 = (d2['httpRequest']['remoteIp'])
if s3 in unique_ip:
unique_ip[s3] = 1
else:
unique_ip[s3] = 1
s4 = (str(d2['httpRequest']['status']))
if s4 in status_code:
status_code[s4] = 1
else:
status_code[s4] = 1
s5 = (d2['httpRequest']['userAgent'])
if s5 in userAgent:
userAgent[s5] = 1
else:
userAgent[s5] = 1
print('request_url: ', request_url)
print('request_method: ', request_method)
print('unique_ip: ', unique_ip)
print('status_code: ', status_code)
print('userAgent: ', userAgent)
example list of dicts. copied your example 3 times and added another unique
[
{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "182.2.169.59",
"serverIp": "10.114.44.4",
"latency": "0.018728s"
}
},
{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "182.2.169.59",
"serverIp": "10.114.44.4",
"latency": "0.018728s"
}
},
{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "182.2.169.59",
"serverIp": "10.114.44.4",
"latency": "0.018728s"
}
},
{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "GET",
"requestUrl": "https://temp/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.11.2",
"remoteIp": "182.2.168.59",
"serverIp": "10.113.44.4",
"latency": "0.018728s"
}
}
]
output from running code
request_url: {'https://dknnkkdkddkd/token': 3, 'https://temp/token': 1}
request_method: {'POST': 3, 'GET': 1}
unique_ip: {'182.2.169.59': 3, '182.2.168.59': 1}
status_code: {'403': 4}
userAgent: {'okhttp/3.12.2': 3, 'okhttp/3.11.2': 1}
CodePudding user response:
Probably the simplest thing to do is use a collections.Counter
instead of a set
to track the removeIp
s encountered. It's a dictionary subclass and like a dictionary, the keys must all be unique, plus it keeps track of how many times each key is "added".
Here is the modified code:
from collections import Counter
import json
request_url = set()
request_method = set()
unique_ip = Counter()
status_code = set()
userAgent = set()
with open("automation.json") as file:
data = json.load(file)
for d2 in data:
s1 = d2['httpRequest']['requestUrl']
request_url.add(''.join(s1))
s2 = d2['httpRequest']['requestMethod']
request_method.add(''.join(s2))
s3 = d2['httpRequest']['remoteIp']
unique_ip.update([s3])
s4 = str(d2['httpRequest']['status'])
status_code.add(''.join(s4))
s5 = d2['httpRequest']['userAgent']
userAgent.add(''.join(s5))
def printing():
a = len(unique_ip)
b = len(request_url)
c = len(request_method)
d = len(userAgent)
e = len(status_code)
with open("output.csv", "w") as f1:
print(
f' {a} Unique IP List = {unique_ip} \n {b} Unique URLs = {request_url} \n {c} Unique Req Method = {request_method} \n'
f' {d} Unique userAgent = {userAgent} \n {e} Unique statusCode = {status_code}', file=f1)
printing()
Note that the file you are creating is not in CSV format, so isn't one.
With that said, if I copied your example data 3 times and added another with a different remoteIp
, this is what is written to the "output.csv"
file:
2 Unique IP List = Counter({'182.2.169.59': 3, '182.2.168.59': 1})
1 Unique URLs = {'https://dknnkkdkddkd/token'}
1 Unique Req Method = {'POST'}
1 Unique userAgent = {'okhttp/3.12.2'}
1 Unique statusCode = {'403'}
Note what is written at the beginning for the "Unique IP List".