Home > other >  Im trying to parse data from json file to sqlite db from yelp api
Im trying to parse data from json file to sqlite db from yelp api

Time:10-12

I have been trying to get this to work for a couple of days now. It is a part of a class project in which I have to do the yelp API for a greater group project of a Django web app. Preface aside, I have been struggling with getting the data into the database. I keep getting an error. I created a function "db()" to try to enter the data into the database specifically working from that function but Im not sure how to address the error. Also, I have been having an error for wheel for numpy when I try to install pandas so Im trying to do it without pandas.

  Message=string indices must be integers
  Source=C:\Users\diggt\OneDrive\College\Rowan\Fall22\10430_computing_and_informatics_capstone\yelp_VSCode\yelp.py
  StackTrace:
  File "C:\Users\diggt\OneDrive\College\Rowan\Fall22\10430_computing_and_informatics_capstone\yelp_VSCode\yelp.py", line 104, in <genexpr>
    keys = (entry[c] for c in columns)
  File "C:\Users\diggt\OneDrive\College\Rowan\Fall22\10430_computing_and_informatics_capstone\yelp_VSCode\yelp.py", line 115, in db
    cur.executemany(sql, keys)
  File "C:\Users\diggt\OneDrive\College\Rowan\Fall22\10430_computing_and_informatics_capstone\yelp_VSCode\yelp.py", line 153, in main
    db()
  File "C:\Users\diggt\OneDrive\College\Rowan\Fall22\10430_computing_and_informatics_capstone\yelp_VSCode\yelp.py", line 157, in <module> (Current frame)
    main()
# -*- coding: utf-8 -*-

from __future__ import print_function

import argparse
import json
import csv
import pprint
import requests
import sys
import sqlite3
#import pandas as pd

from urllib.error import HTTPError
from urllib.parse import quote

API_KEY = 'secret'

# API constants, you shouldn't have to change these.
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses/'  # Business ID will come after slash.

# Defaults
DEFAULT_TERM = 'dinner'
DEFAULT_LOCATION = 'Glassboro, NJ'
SEARCH_LIMIT = 3
OFFSET = 0


def request(host, path, api_key, url_params=None):
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    print(u'Querying {0} ...'.format(url))

    response = requests.request('GET', url, headers=headers, params=url_params)

    return response.json()


def search(api_key, term, location):
    url_params = {
        'term': term.replace(' ', ' '),
        'location': location.replace(' ', ' '),
        'limit': SEARCH_LIMIT,
        'offset': OFFSET
    }
    return request(API_HOST, SEARCH_PATH, api_key, url_params=url_params)


def get_business(api_key, business_id):
    business_path = BUSINESS_PATH   business_id

    return request(API_HOST, business_path, api_key)


def query_api(term, location):
    response = search(API_KEY, term, location)

    businesses = response.get('businesses')

    if not businesses:
        print(u'No businesses for {0} in {1} found.'.format(term, location))
        return

    business_id = businesses[0]['id']

    print(u'{0} businesses found, querying business info ' \
          'for the top result "{1}" ...'.format(
        len(businesses), business_id))
    response = get_business(API_KEY, business_id)

    print(u'Result for business "{0}" found:'.format(business_id))
    pprint.pprint(response, indent=2)

    str_to_write_to_file = json.dumps(response, skipkeys=True, allow_nan=True, indent=4)

    with open('yelp.json', 'w') as f:
        f.write(str_to_write_to_file)

def db():
    
    with open('yelp.json', 'r') as f:
        data = f.readlines()

    conn = sqlite3.connect('yelp.db')
    cur = conn.cursor()
        # Create the table if it doesn't exist.
    cur.execute(
        """CREATE TABLE IF NOT EXISTS yelp(
                id INTEGER PRIMARY KEY,
                alias varchar(100),
                location varchar(100),
                display_phone varchar(15)
            );"""
    )
    
    for entry in data:
        columns = ["id" "alias", "location", "display_phone"]
        keys = (entry[c] for c in columns)
        
        # Execute the command and replace '?' with the each value
        # in 'values'. DO NOT build a string and replace manually.
        # the sqlite3 library will handle non safe strings by doing this.
        sql = """INSERT INTO yelp (id, alias, location, display_phone) VALUES(
                    ?,
                    ?,
                    ?,
                    ?
                );"""
        cur.executemany(sql, keys)
        print(f'{entry["alias"]} data inserted Succefully')

    conn.commit()
    conn.close()

    with sqlite3.connect("yelp.db") as conn:
        cmd = """SELECT * FROM yelp;"""
        cur = conn.execute(cmd)
        res = cur.fetchall()
        for r in res:
            print(r)


def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-q', '--term', dest='term', default=DEFAULT_TERM,
                        type=str, help='Search term (default: %(default)s)')
    parser.add_argument('-l', '--location', dest='location',
                        default=DEFAULT_LOCATION, type=str,
                        help='Search location (default: %(default)s)')

    input_values = parser.parse_args()

    try:
        query_api(input_values.term, input_values.location)

    except HTTPError as error:
        sys.exit(
            'Encountered HTTP error {0} on {1}:\n {2}\nAbort program.'.format(
                error.code,
                error.url,
                error.read(),
            )
        )

    
    db()


if __name__ == '__main__':
    main()

Adding Json file for clarity

{
    "id": "umC69pkiPyk3qY7IB49ZYw",
    "alias": "bosphorus-mediterranean-cuisine-glassboro",
    "name": "Bosphorus Mediterranean Cuisine",
    "image_url": "https://s3-media4.fl.yelpcdn.com/bphoto/G7VCO3tvx8NGPz5g0fSpMw/o.jpg",
    "is_claimed": true,
    "is_closed": false,
    "url": "https://www.yelp.com/biz/bosphorus-mediterranean-cuisine-glassboro?adjust_creative=9aYQmmK21ApZ7TfokeTk1A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_lookup&utm_source=9aYQmmK21ApZ7TfokeTk1A",
    "phone": " 18562432015",
    "display_phone": "(856) 243-2015",
    "review_count": 14,
    "categories": [
        {
            "alias": "turkish",
            "title": "Turkish"
        },
        {
            "alias": "halal",
            "title": "Halal"
        },
        {
            "alias": "kebab",
            "title": "Kebab"
        }
    ],
    "rating": 5.0,
    "location": {
        "address1": "524 Delsea Drive N",
        "address2": null,
        "address3": null,
        "city": "Glassboro",
        "zip_code": "08028",
        "country": "US",
        "state": "NJ",
        "display_address": [
            "524 Delsea Drive N",
            "Glassboro, NJ 08028"
        ],
        "cross_streets": ""
    },
    "coordinates": {
        "latitude": 39.7150351328115,
        "longitude": -75.1118882
    },
    "photos": [
        "https://s3-media4.fl.yelpcdn.com/bphoto/G7VCO3tvx8NGPz5g0fSpMw/o.jpg",
        "https://s3-media2.fl.yelpcdn.com/bphoto/HvhYRZO2rOYUBX0DagVE3w/o.jpg",
        "https://s3-media2.fl.yelpcdn.com/bphoto/PQHr3upfVULUjwz1M-ILcw/o.jpg"
    ],
    "hours": [
        {
            "open": [
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 0
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 1
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 2
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 3
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 4
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 5
                },
                {
                    "is_overnight": false,
                    "start": "1100",
                    "end": "2200",
                    "day": 6
                }
            ],
            "hours_type": "REGULAR",
            "is_open_now": true
        }
    ],
    "transactions": [
        "pickup",
        "delivery"
    ],
    "messaging": {
        "url": "https://www.yelp.com/raq/umC69pkiPyk3qY7IB49ZYw?adjust_creative=9aYQmmK21ApZ7TfokeTk1A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_lookup&utm_source=9aYQmmK21ApZ7TfokeTk1A#popup:raq",
        "use_case_text": "Message the Business"
    }
}

CodePudding user response:

You shouldn't use f.readlines() to read a JSON file, use json.load(f).

There's only one set of values in the JSON, so you don't need a loop or executemany().

def db():
    
    with open('yelp.json', 'r') as f:
        data = json.load(f)

    conn = sqlite3.connect('yelp.db')
    cur = conn.cursor()
        # Create the table if it doesn't exist.
    cur.execute(
        """CREATE TABLE IF NOT EXISTS yelp(
                id INTEGER PRIMARY KEY,
                alias varchar(100),
                location varchar(100),
                display_phone varchar(15)
            );"""
    )
    
    columns = ["id" "alias", "location", "display_phone"]
    keys = [entry[c] for c in columns]
        
    # Execute the command and replace '?' with the each value
    # in 'values'. DO NOT build a string and replace manually.
    # the sqlite3 library will handle non safe strings by doing this.
    sql = """INSERT INTO yelp (id, alias, location, display_phone) VALUES(
                ?,
                ?,
                ?,
                ?
            );"""
    cur.execute(sql, keys)
    print(f'{entry["alias"]} data inserted Succefully')

    conn.commit()
    conn.close()

    with sqlite3.connect("yelp.db") as conn:
        cmd = """SELECT * FROM yelp;"""
        cur = conn.execute(cmd)
        res = cur.fetchall()
        for r in res:
            print(r)

CodePudding user response:

So ultimately I figured it out... pretty much. I used what @Bramar said but the solution was making the json file an array and then I started getting this error sqlite3.ProgrammingError: Incorrect number of bindings supplied. The current statement uses 4, and there are 1 supplied. which turned out to be one of the entries that I had was stored in json as a dict so I eliminated it temporarily to see if I can make it work and it works, this is the code -

 print(u'Result for business "{0}" found:'.format(business_id))

    str_to_write_to_file = json.dumps([response], indent=4)

    with open('yelp.json', 'w') as f:
        f.write(str_to_write_to_file)

def db():
    
    with open('yelp.json', 'r') as f:
        data = json.load(f)

    conn = sqlite3.connect('data/yelp.db')
    cur = conn.cursor()
        # Create the table if it doesn't exist.
    cur.execute(
        """CREATE TABLE IF NOT EXISTS yelp(
                id INTEGER PRIMARY KEY,
                alias varchar(100),
                display_phone varchar(15),
                location dictionary
            );"""
    )
    columns = ["alias", "display_phone"]
    keys = [data[0][c] for c in columns]
        
    # Execute the command and replace '?' with the each value
    # in 'values'. DO NOT build a string and replace manually.
    # the sqlite3 library will handle non safe strings by doing this.
    sql = '''INSERT INTO yelp (alias, display_phone) VALUES(
                ?,
                ?
            );'''
    cur.execute(sql, keys)

    conn.commit()
    conn.close()

Hopefully this helps someone, this can very confusing.

  • Related