I have a JSON log file and want to print and count the number of times a URL(requestURL) has been hit by an IP in the same log file. The output should be like the below:
IP(remoteIp): URL1-(Count), URL2-(Count), URL3...
127.0.0.1: http://www.google.com - 12, www.bing.com/servlet-server.jsp - 2, etc..
The Sample of the Logfile is like below
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "127.0.0.1",
"serverIp": "123.123.33.31",
"latency": "0.018728s"
}
The solution that I am using is below. I am able to get the total hits per IP or how many total times a URL has been hit etc.
import json
from collections import Counter
unique_ip = {}
request_url = {}
def getAndSaveValueSafely(freqTable, searchDict, key):
try:
tmp = searchDict['httpRequest'][key]
if tmp in freqTable:
freqTable[tmp] = 1
else:
freqTable[tmp] = 1
except KeyError:
if 'not_present' in freqTable:
freqTable['not_present'] = 1
else:
freqTable['not_present'] = 1
with open("threat_intel_1.json") as file:
data = json.load(file)
for d2 in data:
getAndSaveValueSafely(unique_ip, d2, 'remoteIp')
getAndSaveValueSafely(request_url, d2, 'requestUrl')
mc_unique_ip = (dict(Counter(unique_ip).most_common()))
mc_request_url = (dict(Counter(request_url).most_common()))
def printing():
a = str(len(unique_ip))
b = str(len(request_url))
with open("output.txt", "w") as f1:
print(
f' Start Time of log = {minTs}'
f' \n\n End Time of log = {maxTs} \n\n\n {a} Unique IP List = {mc_unique_ip} \n\n\n {b} Unique URL = {mc_request_url},file=f1)
CodePudding user response:
I dont think you need to use counter and are unlikely to see any benifit
from collections import defaultdict
result = {} # start empty
with open("threat_intel_1.json") as file:
data = json.load(file)
for d2 in data:
req = d2.get('httpRequest',None)
if not req:
continue
url = req['requestUrl']
ip = req['remoteIp']
result.setdefault(url,defaultdict(int))[ip] = 1
print(result)
# {"/endpoint.html": {"127.2.3.4":15,"222.11.31.22":2}}
if instead you want it the other way thats easy also
for d2 in data:
req = d2.get('httpRequest',None)
if not req:
continue
url = req['requestUrl']
ip = req['remoteIp']
result.setdefault(ip,defaultdict(int))[url] = 1
#{"127.1.2.3",{"/endpoint1.html":15,"/endpoint2.php":1},"33.44.55.66":{"/endpoint1.html":5}, ...}
instead of using defaultdict you could add a line
# result.setdefault(ip,defaultdict(int))[url] = 1
result.setdefault(ip,{})
result[ip][url] = result[ip].get(url,0) 1
which arguably is more readable anyway...