Home > Enterprise >  Unable to prase, filter or extract json.dumps object using loop
Unable to prase, filter or extract json.dumps object using loop

Time:01-12

I want the firs element starting after [{ to be extracted using the code below `

[
  {
    "Bkav": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Bkav"
    },
    "CMC Threat Intelligence": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "CMC Threat Intelligence"
    },
    "Snort IP sample list": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Snort IP sample list"
    },
    "0xSI_f33d": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "0xSI_f33d"
    },
    "ViriBack": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "ViriBack"
    },
    "Comodo Valkyrie Verdict": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Comodo Valkyrie Verdict"
    },
    "PhishLabs": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "PhishLabs"
    },
    "K7AntiVirus": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "K7AntiVirus"
    },
    "CINS Army": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "CINS Army"
    },
    "Quttera": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Quttera"
    },
    "PrecisionSec": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "PrecisionSec"
    },
    "OpenPhish": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "OpenPhish"
    },
    "VX Vault": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "VX Vault"
    },
    "Web Security Guard": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Web Security Guard"
    },
    "Scantitan": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Scantitan"
    },
    "AlienVault": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "AlienVault"
    },
    "Sophos": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Sophos"
    },
    "Phishtank": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Phishtank"
    },
    "Cyan": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Cyan"
    },
    "Spam404": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "Spam404"
    },
    "SecureBrain": {
      "category": "harmless",
      "result": "clean",
      "method": "blacklist",
      "engine_name": "SecureBrain"
    }
  }
]

`

The code works and I get output using y variable

import json
import re
from http.client import responses

import vt
import requests

with open('/home/asad/Downloads/ssh-log-parser/ok', 'r') as file:
    file = file.read()

pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
ips = pattern.findall(file)
unique_ips = list(set(ips))
# print(unique_ips)
# print(len(unique_ips))
headers = {
    "accept": "application/json",
    "x-apikey": "###"
}
i = 0
url = "https://www.virustotal.com/api/v3/ip_addresses/"
messages = []
while i < len(unique_ips):
    furl = url   str(unique_ips[i])
    response = requests.get(furl, headers=headers)
    data_ = response.json()
    i  = 1
    # print(data_)
    messages = [data_['data']['attributes']['last_analysis_results']]
    y = json.dumps(messages)
     # the result is a Python dictionary:
    print(y)

#for ii in y:
    #print(ii, ":", y[ii])

    labels = [{"value": i} for i in unique_ips]

    out_json = {
        "indicators": {
            "value": labels,
            "type": 'ip'

        },

    }

    #print(out_json)

Now, if i want to grab the first key using y[0] i get the first character [ under json object, where I want the string. I tried to loop over the array but I'm getting

Traceback (most recent call last):
  File "/home/asad/Downloads/ssh-log-parser/auth_log_parser.py", line 35, in <module>
    print(ii, ":", y[ii])
TypeError: string indices must be integers

I want to extract following keys in bold

[{"Bkav": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "Bkav"}, "CMC Threat Intelligence": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "CMC Threat Intelligence"}, "Snort IP sample list": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "Snort IP sample list"}, `

CodePudding user response:

Is pandas an option for you: (I stored your example .json into 'Test.json')

import pandas as pd

df = pd.read_json('Test.json')
print(df.values)

Output:

[[{'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'Bkav'}
  {'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'CMC Threat Intelligence'}
  {'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'Snort IP sample list'} ...]]

And if you can remove the list [] from the file, it could be printed nice:

import pandas as pd

df = pd.read_json('Test.json', orient='index')
print(df)

Output:

                  category result     method              engine_name
Bkav                     harmless  clean  blacklist                     Bkav
CMC Threat Intelligence  harmless  clean  blacklist  CMC Threat Intelligence
Snort IP sample list     harmless  clean  blacklist     Snort IP sample list
0xSI_f33d                harmless  clean  blacklist                0xSI_f33d
ViriBack                 harmless  clean  blacklist                 ViriBack
Comodo Valkyrie Verdict  harmless  clean  blacklist  Comodo Valkyrie Verdict
PhishLabs                harmless  clean  blacklist                PhishLabs
K7AntiVirus              harmless  clean  blacklist              K7AntiVirus
CINS Army                harmless  clean  blacklist                CINS Army
Quttera                  harmless  clean  blacklist                  Quttera
PrecisionSec             harmless  clean  blacklist             PrecisionSec
OpenPhish                harmless  clean  blacklist                OpenPhish
VX Vault                 harmless  clean  blacklist                 VX Vault
Web Security Guard       harmless  clean  blacklist       Web Security Guard
Scantitan                harmless  clean  blacklist                Scantitan
AlienVault               harmless  clean  blacklist               AlienVault
Sophos                   harmless  clean  blacklist                   Sophos
Phishtank                harmless  clean  blacklist                Phishtank
Cyan                     harmless  clean  blacklist                     Cyan
Spam404                  harmless  clean  blacklist                  Spam404
SecureBrain              harmless  clean  blacklist              SecureBrain

And you searched the first element:

print("First element",df.first_valid_index())

gives you: >>> First element **Bkav**

  • Related