I want the firs element starting after [{
to be extracted using the code below
`
[
{
"Bkav": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Bkav"
},
"CMC Threat Intelligence": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "CMC Threat Intelligence"
},
"Snort IP sample list": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Snort IP sample list"
},
"0xSI_f33d": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "0xSI_f33d"
},
"ViriBack": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "ViriBack"
},
"Comodo Valkyrie Verdict": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Comodo Valkyrie Verdict"
},
"PhishLabs": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "PhishLabs"
},
"K7AntiVirus": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "K7AntiVirus"
},
"CINS Army": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "CINS Army"
},
"Quttera": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Quttera"
},
"PrecisionSec": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "PrecisionSec"
},
"OpenPhish": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "OpenPhish"
},
"VX Vault": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "VX Vault"
},
"Web Security Guard": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Web Security Guard"
},
"Scantitan": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Scantitan"
},
"AlienVault": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "AlienVault"
},
"Sophos": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Sophos"
},
"Phishtank": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Phishtank"
},
"Cyan": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Cyan"
},
"Spam404": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "Spam404"
},
"SecureBrain": {
"category": "harmless",
"result": "clean",
"method": "blacklist",
"engine_name": "SecureBrain"
}
}
]
`
The code works and I get output using y
variable
import json
import re
from http.client import responses
import vt
import requests
with open('/home/asad/Downloads/ssh-log-parser/ok', 'r') as file:
file = file.read()
pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
ips = pattern.findall(file)
unique_ips = list(set(ips))
# print(unique_ips)
# print(len(unique_ips))
headers = {
"accept": "application/json",
"x-apikey": "###"
}
i = 0
url = "https://www.virustotal.com/api/v3/ip_addresses/"
messages = []
while i < len(unique_ips):
furl = url str(unique_ips[i])
response = requests.get(furl, headers=headers)
data_ = response.json()
i = 1
# print(data_)
messages = [data_['data']['attributes']['last_analysis_results']]
y = json.dumps(messages)
# the result is a Python dictionary:
print(y)
#for ii in y:
#print(ii, ":", y[ii])
labels = [{"value": i} for i in unique_ips]
out_json = {
"indicators": {
"value": labels,
"type": 'ip'
},
}
#print(out_json)
Now, if i want to grab the first key using y[0]
i get the first character [
under json object, where I want the string. I tried to loop over the array but I'm getting
Traceback (most recent call last):
File "/home/asad/Downloads/ssh-log-parser/auth_log_parser.py", line 35, in <module>
print(ii, ":", y[ii])
TypeError: string indices must be integers
I want to extract following keys in bold
[{"Bkav": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "Bkav"}, "CMC Threat Intelligence": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "CMC Threat Intelligence"}, "Snort IP sample list": {"category": "harmless", "result": "clean", "method": "blacklist", "engine_name": "Snort IP sample list"}, `
CodePudding user response:
Is pandas an option for you:
(I stored your example .json into 'Test.json'
)
import pandas as pd
df = pd.read_json('Test.json')
print(df.values)
Output:
[[{'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'Bkav'}
{'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'CMC Threat Intelligence'}
{'category': 'harmless', 'result': 'clean', 'method': 'blacklist', 'engine_name': 'Snort IP sample list'} ...]]
And if you can remove the list []
from the file, it could be printed nice:
import pandas as pd
df = pd.read_json('Test.json', orient='index')
print(df)
Output:
category result method engine_name
Bkav harmless clean blacklist Bkav
CMC Threat Intelligence harmless clean blacklist CMC Threat Intelligence
Snort IP sample list harmless clean blacklist Snort IP sample list
0xSI_f33d harmless clean blacklist 0xSI_f33d
ViriBack harmless clean blacklist ViriBack
Comodo Valkyrie Verdict harmless clean blacklist Comodo Valkyrie Verdict
PhishLabs harmless clean blacklist PhishLabs
K7AntiVirus harmless clean blacklist K7AntiVirus
CINS Army harmless clean blacklist CINS Army
Quttera harmless clean blacklist Quttera
PrecisionSec harmless clean blacklist PrecisionSec
OpenPhish harmless clean blacklist OpenPhish
VX Vault harmless clean blacklist VX Vault
Web Security Guard harmless clean blacklist Web Security Guard
Scantitan harmless clean blacklist Scantitan
AlienVault harmless clean blacklist AlienVault
Sophos harmless clean blacklist Sophos
Phishtank harmless clean blacklist Phishtank
Cyan harmless clean blacklist Cyan
Spam404 harmless clean blacklist Spam404
SecureBrain harmless clean blacklist SecureBrain
And you searched the first element:
print("First element",df.first_valid_index())
gives you: >>> First element **Bkav**