import requests
from bs4 import BeautifulSoup
import json
data = {
0:{
0:"title",
1:"dates",
2:"city/state",
3:"country"
},
1:{
0:"event",
1:"reps",
2:"prize"
},
2:{
0:"results"
}
}
url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
response = requests.get(url).text
soup = BeautifulSoup(response, features='lxml')
all_data = []
for element in soup.find_all('div', class_="row"):
event = {}
for i, col in enumerate(element.find_all('div', class_='col-md-4')):
for j, item in enumerate(col.strings):
event[data[i][j]] = item
all_data.append(event)
print(json.dumps(all_data,indent=4))
heres a link to the website https://mms.kcbs.us/members/evr_search.php?org_id=KCBA
Im unsure why nothing gets added to the list and dictionaries
CodePudding user response:
The data you see is loaded from external URL via JavaScript. To simulate the Ajax request you can use next example:
import json
import requests
from bs4 import BeautifulSoup
api_url = "https://mms.kcbs.us/members/evr_search_ol_json.php"
params = {
"otype": "TEXT",
"evr_map_type": "2",
"org_id": "KCBA",
"evr_begin": "6/16/2022",
"evr_end": "7/16/2022",
"evr_address": "",
"evr_radius": "50",
"evr_type": "269",
"evr_openings": "0",
"evr_region": "",
"evr_region_type": "1",
"evr_judge": "0",
"evr_keyword": "",
"evr_rep_name": "",
}
soup = BeautifulSoup(
requests.get(api_url, params=params).content, "html.parser"
)
data = {
0: {0: "title", 1: "dates", 2: "city/state", 3: "country"},
1: {0: "event", 1: "reps", 2: "prize"},
2: {0: "results"},
}
all_data = []
for element in soup.find_all("div", class_="row"):
event = {}
for i, col in enumerate(element.find_all("div", class_="col-md-4")):
for j, item in enumerate(col.strings):
event[data[i][j]] = item
all_data.append(event)
print(json.dumps(all_data, indent=4))
Prints:
[
{
"title": "Frisco BBQ Challenge",
"dates": "6/16/2022 - 6/18/2022",
"city/state": "Frisco, CO 80443",
"country": "UNITED STATES",
"event": "STATE CHAMPIONSHIP",
"reps": "Reps: BUNNY TUTTLE, RICH TUTTLE, MICHAEL WINTER",
"prize": "Prize Money: $13,050.00",
"results": "Results Not In"
},
{
"title": "York County BBQ Festival",
"dates": "6/17/2022 - 6/18/2022",
"city/state": "Delta, PA 17314",
"country": "UNITED STATES",
"event": "STATE CHAMPIONSHIP",
"reps": "Reps: ANGELA MCKEE, ROBERT MCKEE, LOUISE WEIDNER",
"prize": "Prize Money: $5,500.00",
"results": "Results Not In"
},
...and so on.
CodePudding user response:
The website requires JavaScript to run in a browser which doesn't happen with requests. It is far easier to use selenium.
You need to check your chrome browser version and install chrome driver (here)[https://chromedriver.chromium.org/downloads]
from selenium import webdriver
url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
chrome_driver_path = r"" # your chrome driver path here
driver = webdriver.Chrome(executable_path=chrome_driver_path)
driver.get(url)
for elements in driver.find_elements_by_class_name('row'):
event = {}
# time.sleep(0.5) # if "stale element reference" error occur
for i, col in enumerate(elements.find_elements_by_class_name('col-md-4')):
for j, item in enumerate(col.text):
pass # your code here