Home > Enterprise >  Count the number of occurrences of an IP in the JSON log file
Count the number of occurrences of an IP in the JSON log file

Time:07-13

I have the following data in JSON format. I want to find the number of occurrences (count) of each unique value of the "remoteIp" key.

{
"insertId": "kdkddkdmdkd",
"jsonPayload": {
  "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
  "enforcedSecurityPolicy": {
    "configuredAction": "DENY",
    "outcome": "DENY",
    "preconfiguredExprIds": [
      "owasp-crs-v030001-id942220-sqli"
    ],
    "name": "shbdbbddjdjdjd",
    "priority": 2000
  },
  "statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
  "requestMethod": "POST",
  "requestUrl": "https://dknnkkdkddkd/token",
  "requestSize": "3004",
  "status": 403,
  "responseSize": "274",
  "userAgent": "okhttp/3.12.2",
  "remoteIp": "182.2.169.59",
  "serverIp": "10.114.44.4",
  "latency": "0.018728s"
}

The solution I have created till now is able to fetch all the unique "remoteIp"s and saved them to a set. But somehow I am not able to count the occurrence of each unique Ip in the log file.

import json

unique_ip = set()
request_url = set()
request_method = set()
status_code = set()
userAgent = set()

with open("automation.json") as file:
    data = json.load(file)

for d2 in data:
    s1 = (d2['httpRequest']['requestUrl'])
    request_url.add(''.join(s1))
    s2 = (d2['httpRequest']['requestMethod'])
    request_method.add(''.join(s2))
    s3 = (d2['httpRequest']['remoteIp'])
    unique_ip.add(''.join(s3))
    s4 = (str(d2['httpRequest']['status']))
    status_code.add(''.join(s4))
    s5 = (d2['httpRequest']['userAgent'])
    userAgent.add(''.join(s5))

def printing():
    a = str(len(unique_ip))
    b = str(len(request_url))
    c = str(len(request_method))
    d = str(len(userAgent))
    e = str(len(status_code))
    with open("output.csv", "w") as f1:
        print(
            f' {a} Unique IP List = {unique_ip}  \n {b} Unique URLs = {request_url} \n {c} Unique Req Method = {request_method} \n'
            f' {d} Unique userAgent = {userAgent} \n {e} Unique statusCode = {status_code}', file=f1)

printing()

CodePudding user response:

Make a frequency table instead of a set. You'll go through the same amount of steps, but instead of not adding already existing IPs and other, you add to their frequency.

import json
unique_ip = {}
request_url = {}
request_method = {}
status_code = {}
userAgent = {}
with open("tmp.json") as file:
    data = json.load(file)
for d2 in data:
    s1 = (d2['httpRequest']['requestUrl'])
    if s1 in request_url:
        request_url[s1]  = 1
    else:
        request_url[s1] = 1
    
    s2 = (d2['httpRequest']['requestMethod'])
    if s2 in request_method:
        request_method[s2]  = 1
    else:
        request_method[s2] = 1
    
    s3 = (d2['httpRequest']['remoteIp'])
    if s3 in unique_ip:
        unique_ip[s3]  = 1
    else:
        unique_ip[s3] = 1
    
    s4 = (str(d2['httpRequest']['status']))
    if s4 in status_code:
        status_code[s4]  = 1
    else:
        status_code[s4] = 1
    
    s5 = (d2['httpRequest']['userAgent'])
    if s5 in userAgent:
        userAgent[s5]  = 1
    else:
        userAgent[s5] = 1

print('request_url: ', request_url)
print('request_method: ', request_method)
print('unique_ip: ', unique_ip)
print('status_code: ', status_code)
print('userAgent: ', userAgent)

example list of dicts. copied your example 3 times and added another unique

[
    {
    "insertId": "kdkddkdmdkd",
    "jsonPayload": {
      "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
      "enforcedSecurityPolicy": {
        "configuredAction": "DENY",
        "outcome": "DENY",
        "preconfiguredExprIds": [
          "owasp-crs-v030001-id942220-sqli"
        ],
        "name": "shbdbbddjdjdjd",
        "priority": 2000
      },
      "statusDetails": "body_denied_by_security_policy"
    },
    "httpRequest": {
      "requestMethod": "POST",
      "requestUrl": "https://dknnkkdkddkd/token",
      "requestSize": "3004",
      "status": 403,
      "responseSize": "274",
      "userAgent": "okhttp/3.12.2",
      "remoteIp": "182.2.169.59",
      "serverIp": "10.114.44.4",
      "latency": "0.018728s"
    }
    },
    
    {
        "insertId": "kdkddkdmdkd",
        "jsonPayload": {
          "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
          "enforcedSecurityPolicy": {
            "configuredAction": "DENY",
            "outcome": "DENY",
            "preconfiguredExprIds": [
              "owasp-crs-v030001-id942220-sqli"
            ],
            "name": "shbdbbddjdjdjd",
            "priority": 2000
          },
          "statusDetails": "body_denied_by_security_policy"
        },
        "httpRequest": {
          "requestMethod": "POST",
          "requestUrl": "https://dknnkkdkddkd/token",
          "requestSize": "3004",
          "status": 403,
          "responseSize": "274",
          "userAgent": "okhttp/3.12.2",
          "remoteIp": "182.2.169.59",
          "serverIp": "10.114.44.4",
          "latency": "0.018728s"
        }
    },
    {
        "insertId": "kdkddkdmdkd",
        "jsonPayload": {
          "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
          "enforcedSecurityPolicy": {
            "configuredAction": "DENY",
            "outcome": "DENY",
            "preconfiguredExprIds": [
              "owasp-crs-v030001-id942220-sqli"
            ],
            "name": "shbdbbddjdjdjd",
            "priority": 2000
          },
          "statusDetails": "body_denied_by_security_policy"
        },
        "httpRequest": {
          "requestMethod": "POST",
          "requestUrl": "https://dknnkkdkddkd/token",
          "requestSize": "3004",
          "status": 403,
          "responseSize": "274",
          "userAgent": "okhttp/3.12.2",
          "remoteIp": "182.2.169.59",
          "serverIp": "10.114.44.4",
          "latency": "0.018728s"
        }
    },
    {
        "insertId": "kdkddkdmdkd",
        "jsonPayload": {
          "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
          "enforcedSecurityPolicy": {
            "configuredAction": "DENY",
            "outcome": "DENY",
            "preconfiguredExprIds": [
              "owasp-crs-v030001-id942220-sqli"
            ],
            "name": "shbdbbddjdjdjd",
            "priority": 2000
          },
          "statusDetails": "body_denied_by_security_policy"
        },
        "httpRequest": {
          "requestMethod": "GET",
          "requestUrl": "https://temp/token",
          "requestSize": "3004",
          "status": 403,
          "responseSize": "274",
          "userAgent": "okhttp/3.11.2",
          "remoteIp": "182.2.168.59",
          "serverIp": "10.113.44.4",
          "latency": "0.018728s"
        }
    }
]

output from running code

request_url:  {'https://dknnkkdkddkd/token': 3, 'https://temp/token': 1}
request_method:  {'POST': 3, 'GET': 1}
unique_ip:  {'182.2.169.59': 3, '182.2.168.59': 1}
status_code:  {'403': 4}
userAgent:  {'okhttp/3.12.2': 3, 'okhttp/3.11.2': 1}

CodePudding user response:

Probably the simplest thing to do is use a collections.Counter instead of a set to track the removeIps encountered. It's a dictionary subclass and like a dictionary, the keys must all be unique, plus it keeps track of how many times each key is "added".

Here is the modified code:

from collections import Counter
import json

request_url = set()
request_method = set()
unique_ip = Counter()
status_code = set()
userAgent = set()

with open("automation.json") as file:
    data = json.load(file)

for d2 in data:
    s1 = d2['httpRequest']['requestUrl']
    request_url.add(''.join(s1))
    s2 = d2['httpRequest']['requestMethod']
    request_method.add(''.join(s2))
    s3 = d2['httpRequest']['remoteIp']
    unique_ip.update([s3])
    s4 = str(d2['httpRequest']['status'])
    status_code.add(''.join(s4))
    s5 = d2['httpRequest']['userAgent']
    userAgent.add(''.join(s5))

def printing():
    a = len(unique_ip)
    b = len(request_url)
    c = len(request_method)
    d = len(userAgent)
    e = len(status_code)
    with open("output.csv", "w") as f1:
        print(
            f' {a} Unique IP List = {unique_ip}  \n {b} Unique URLs = {request_url} \n {c} Unique Req Method = {request_method} \n'
            f' {d} Unique userAgent = {userAgent} \n {e} Unique statusCode = {status_code}', file=f1)

printing()

Note that the file you are creating is not in CSV format, so isn't one.

With that said, if I copied your example data 3 times and added another with a different remoteIp, this is what is written to the "output.csv" file:

 2 Unique IP List = Counter({'182.2.169.59': 3, '182.2.168.59': 1})  
 1 Unique URLs = {'https://dknnkkdkddkd/token'} 
 1 Unique Req Method = {'POST'} 
 1 Unique userAgent = {'okhttp/3.12.2'} 
 1 Unique statusCode = {'403'}

Note what is written at the beginning for the "Unique IP List".

  • Related