I'm just starting to learn how to parse. From the website I need to get the names of the directions and the educational program (bachelor's degree, master's degree, etc...). But at the output I get only one element. Here is my code:
def get_HTML(url, params=None):
request = requests.get(url, headers=HEADERS, params = params) #params = get parameters
return request
def get_Content(html):
soup = BeautifulSoup(html, 'html.parser')
eduPrograms = soup.find_all('div', class_= 'column-center_rasp')
eduProgram = []
for i in eduPrograms:
eduProgram.append({
'title':i.find('div', class_='headerEduPrograms').get_text()
})
print(eduProgram)
eduDirection = soup.find_all('div', {'id': 'fak_id_7a3586aa7b32182f036c0dab143d2df8_493'})
eduDirections = []
for i in eduDirection:
eduDirections.append({
'title':i.find('div', class_='grpPeriod').get_text()
})
print(eduDirections)
def parse():
html = get_HTML(URL)
if (html.status_code) == 200:
get_Content(html.text)
else:
print('Error')
parse()
At the output I get only:
[{'title': 'Бакалавр'}] [{'title': '\n ИВТб-1301-04-00 '}]
HTML from site(only for a eduDirection)
CodePudding user response:
eduDirection
only has one match, there's no need to loop it.
In the loop, you're using i.find()
, which just finds the first DIV with that class. You need to use .find_all()
to find all of them.
eduDirection = soup.find('div', {'id': 'fak_id_7a3586aa7b32182f036c0dab143d2df8_493'})
eduDirections = [{'title': i.get_text()} for i in eduDirection.find_all('div', class_='grpPeriod')]
print(eduDirections)