Home > Software design >  Why does this print an empty list and dictionary?
Why does this print an empty list and dictionary?

Time:06-16

import requests
from bs4 import BeautifulSoup
import json

data = {
    0:{
        0:"title",
        1:"dates",
        2:"city/state",
        3:"country"
    },
    1:{
        0:"event",
        1:"reps",
        2:"prize"
    },
    2:{
        0:"results"
    }
}


url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
response = requests.get(url).text
soup = BeautifulSoup(response, features='lxml')
all_data = []
for element in soup.find_all('div', class_="row"):
    event = {}
    for i, col in enumerate(element.find_all('div', class_='col-md-4')):
        for j, item in enumerate(col.strings):
            event[data[i][j]] = item
    all_data.append(event)

print(json.dumps(all_data,indent=4))

heres a link to the website https://mms.kcbs.us/members/evr_search.php?org_id=KCBA

Im unsure why nothing gets added to the list and dictionaries

CodePudding user response:

The data you see is loaded from external URL via JavaScript. To simulate the Ajax request you can use next example:

import json
import requests
from bs4 import BeautifulSoup


api_url = "https://mms.kcbs.us/members/evr_search_ol_json.php"

params = {
    "otype": "TEXT",
    "evr_map_type": "2",
    "org_id": "KCBA",
    "evr_begin": "6/16/2022",
    "evr_end": "7/16/2022",
    "evr_address": "",
    "evr_radius": "50",
    "evr_type": "269",
    "evr_openings": "0",
    "evr_region": "",
    "evr_region_type": "1",
    "evr_judge": "0",
    "evr_keyword": "",
    "evr_rep_name": "",
}

soup = BeautifulSoup(
    requests.get(api_url, params=params).content, "html.parser"
)


data = {
    0: {0: "title", 1: "dates", 2: "city/state", 3: "country"},
    1: {0: "event", 1: "reps", 2: "prize"},
    2: {0: "results"},
}

all_data = []
for element in soup.find_all("div", class_="row"):
    event = {}
    for i, col in enumerate(element.find_all("div", class_="col-md-4")):
        for j, item in enumerate(col.strings):
            event[data[i][j]] = item
    all_data.append(event)

print(json.dumps(all_data, indent=4))

Prints:

[
    {
        "title": "Frisco BBQ Challenge",
        "dates": "6/16/2022 - 6/18/2022",
        "city/state": "Frisco, CO 80443",
        "country": "UNITED STATES",
        "event": "STATE CHAMPIONSHIP",
        "reps": "Reps: BUNNY TUTTLE, RICH TUTTLE, MICHAEL WINTER",
        "prize": "Prize Money: $13,050.00",
        "results": "Results Not In"
    },
    {
        "title": "York County BBQ Festival",
        "dates": "6/17/2022 - 6/18/2022",
        "city/state": "Delta, PA 17314",
        "country": "UNITED STATES",
        "event": "STATE CHAMPIONSHIP",
        "reps": "Reps: ANGELA MCKEE, ROBERT MCKEE, LOUISE WEIDNER",
        "prize": "Prize Money: $5,500.00",
        "results": "Results Not In"
    },


...and so on.

CodePudding user response:

The website requires JavaScript to run in a browser which doesn't happen with requests. It is far easier to use selenium.

You need to check your chrome browser version and install chrome driver (here)[https://chromedriver.chromium.org/downloads]

from selenium import webdriver
url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
chrome_driver_path = r""  # your chrome driver path here
driver = webdriver.Chrome(executable_path=chrome_driver_path)
driver.get(url)
for elements in driver.find_elements_by_class_name('row'):
    event = {}
    # time.sleep(0.5)  # if "stale element reference" error occur
    for i, col in enumerate(elements.find_elements_by_class_name('col-md-4')):
        for j, item in enumerate(col.text):
            pass  # your code here
  • Related