Home > Software design >  Uploading JSON file to elasticsearch/kibana
Uploading JSON file to elasticsearch/kibana

Time:05-28

This code is converting access.log log to JSON format. I'm trying to upload it to elasticsearch in such a way that index also create as well. How can i do that?

import json 
import re
import logging
import ecs_logging
import time
import sys, requests, os
import time
from datetime import datetime
from elasticsearch import Elasticsearch
from pprint import pprint


res = requests.get('http://localhost:9200')
print (res.content)

es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])


i = 0
result = {}

with open('access.log') as f:
    lines = f.readlines()

regex = '(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>.*)\] \"(?P<httpstatus>(GET|POST) .  HTTP\/1\.1)\" (?P<returnstatus>\d{3} \d ) (\".*\")(?P<browserinfo>.*)\"'

for line in lines:

    r = re.match(regex,line)
    
    if r != None:
        result[i] = {'IP address': r.group('ipaddress'), 'Time Stamp': r.group('dateandtime'), 
                     'HTTP status': r.group('httpstatus'), 'Return status': 
                     r.group('returnstatus'), 'Browser Info': r.group('browserinfo')}
        i  = 1
    
print(result)

with open('data.json', 'w') as fp:
    json.dump(result, fp) ``` 

CodePudding user response:

Modified your code hope this would help you, if you have enabled authentication then we should mention the same and also as you have asked an index it would create named "my-index"

import json 
import re
import logging
import ecs_logging
import time
import sys, requests, os
import time
from datetime import datetime
from elasticsearch import Elasticsearch, helpers
from pprint import pprint
import json
from datetime import datetime
from time import sleep
from requests.auth import HTTPBasicAuth

client = Elasticsearch(["localhost:9200"], http_auth=('elastic', '<your-es-pwd>'))
#no authentication
#client = Elasticsearch(["localhost:9200"], http_auth=('elastic', '<your-es-pwd>'))

i = 0
result = {}

with open('access.log') as f:
    lines = f.readlines()

regex = '(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>.*)\] \"(?P<httpstatus>(GET|POST) .  HTTP\/1\.1)\" (?P<returnstatus>\d{3} \d ) (\".*\")(?P<browserinfo>.*)\"'

for line in lines:

    r = re.match(regex,line)
    
    if r != None:
        result[i] = {'IP address': r.group('ipaddress'), 'Time Stamp': r.group('dateandtime'), 
                     'HTTP status': r.group('httpstatus'), 'Return status': 
                     r.group('returnstatus'), 'Browser Info': r.group('browserinfo')}
        i  = 1


with open('data.json', 'w') as fp:
    json.dump(result, fp)
    
directory = '.'


def load_json(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.endswith('data.json'):
            with open(filename, 'r') as open_file:
                json_data = json.load(open_file)
                for i,k in enumerate(json_data.keys()):
                    data.append({
                        "_index": "my-index",
                         "_type": "my-type",
                        "_id": i,
                        "_source": json_data[k]
                    })
    return data

helpers.bulk(client, load_json('.'))

res = requests.get('http://localhost:9200', auth = HTTPBasicAuth('elastic', '<your-password'))
#no authentication enabled
#res = requests.get('http://localhost:9200')
print(res.content)
  • Related