Just starting to build my first webscraper and I cannot figure out why my beautifulsoup module isn't able to call. Thank you for helping the beginner :(
import requests
from bs4 import BeautifulSoup
# Import your website here
html_text = requests.get(
'https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=').text
soup = BeautifulSoup(html_text, 'lxml')
# Find the part of the webpage where your information is in
job = soup.find_all('li', class_='clearfix job-bx wht-shd-bx')
company_name = job.find('h3', class_='joblist-comp-name').text.replace(' ', '')
skills = job.find('span', class_='srp-skills').text.replace(' ', '')
published_date = job.find('span', class_='sim-posted').span.text
print(published_date)
print(f'''
Company name: {company_name}
Required Skills: {skills}
''')
Traceback (most recent call last):
File "c:\Users\GamingPC\OneDrive - University of Pittsburgh\Random Work\Desktop\Python Projects\Web scraping.py", line 12, in <module>
company_name = job.find('h3', class_ = 'joblist-comp-name').text.replace(' ', '')
File "C:\Users\GamingPC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\bs4\element.py", line 2253, in __getattr__
raise AttributeError(
AttributeError: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
CodePudding user response:
job = soup.find_all('li', class_ = 'clearfix job-bx wht-shd-bx')
company_name = job.find('h3', class_ = 'joblist-comp-name').text.replace(' ', '')
find_all()
returns a result set. You can't call find()
on a result set.
CodePudding user response:
As John Gordon says, you can't call find()
on a result set, so you need to iterate over the variable job
, something like this would work:
import requests
from bs4 import BeautifulSoup
#Import your website here
html_text = requests.get('https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=').text
soup = BeautifulSoup(html_text, 'lxml')
#Find the part of the webpage where your information is in
job = soup.find_all('li', class_ = 'clearfix job-bx wht-shd-bx')
for item in job:
company_name = item.find('h3', class_ = 'joblist-comp-name').text.replace(' ', '').strip()
skills = item.find('span', class_ = 'srp-skills').text.replace(' ', '').strip()
published_date = item.find('span', class_ = 'sim-posted').span.text
print(published_date)
print(f'''
Company name: {company_name}
Required Skills: {skills}
''')