Home > Blockchain >  Why is Beautiful Soup Returning duplicate results?
Why is Beautiful Soup Returning duplicate results?

Time:10-23

I am creating a project that scrapes indeeds website and it was working fine but when I ran it today, all of a sudden without having made any changes, instead of returning the entire page of results, it no only displays the first result in duplicates. May someone help me correct this

from tkinter import *
import random
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
import requests


html_text = requests.get('https://www.ign.com/').text
soup = BeautifulSoup(html_text, 'lxml')
jobs = soup.find('section',class_='right')
#print(html_text)


driver = webdriver.Chrome(executable_path='/Users/Miscellaneous/PycharmProjects/RecursivePractice/chromedriver')
url= "https://www.indeed.com/jobs?q=developer&l=Westbury, NY&vjk=0b0cbe29e5f86422"
driver.maximize_window()
driver.get(url)

time.sleep(5)
content = driver.page_source.encode('utf-8').strip()
soup = BeautifulSoup(content,"html.parser")
officials = soup.findAll("a",{"class":"tapItem"}

for official in officials:
  jobTitle = soup.find('h2',{'class': 'jobTitle'}).text
  companyName = soup.find('div',{'class': 'comapny_location'})
  location = soup.find('div',{'class': 'companyLocation'}).text
  salary = soup.find('div',{'class': 'salary-snippet'})
  actualSalary = salary.find('span').text
  summary = soup.find('div',{'class': 'job-snippet'}).text

print('Title: '   str(jobTitle)   '\nCompany Name: '   str(companyName)   '\nLocation: '   str(location)
        '\nSalary: '   str(actualSalary)   "\nSummary: "   str(summary))
#print(str(official))
print(' ')


driver.quit()

CodePudding user response:

Try this

from tkinter import *
import random
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
import requests


html_text = requests.get('https://www.ign.com/').text
soup = BeautifulSoup(html_text, 'lxml')
jobs = soup.find('section',class_='right')


driver = webdriver.Chrome(executable_path='/Users/Miscellaneous/PycharmProjects/RecursivePractice/chromedriver')
url= "https://www.indeed.com/jobs?q=developer&l=Westbury, NY&vjk=0b0cbe29e5f86422"
driver.maximize_window()
driver.get(url)

time.sleep(5)
content = driver.page_source.encode('utf-8').strip()
soup = BeautifulSoup(content,"html.parser")
officials = soup.findAll("a",{"class":"tapItem"})

for i in range(len(officials)):
    jobTitle = soup.findAll('h2',{'class': 'jobTitle'})[i].text

    companyName = soup.findAll('div',{'class': 'comapny_location'})[i].text if len(soup.findAll('div',{'class': 'comapny_location'})) > i else "NULL"
    location = soup.findAll('div',{'class': 'companyLocation'})[i].text if len(soup.findAll('div',{'class': 'companyLocation'})) > i else "NULL"
    salary = soup.findAll('div',{'class': 'salary-snippet'})[i].text if len(soup.findAll('div',{'class': 'salary-snippet'})) > i else "NULL"
    actualSalary = salary.find('span')
    summary = soup.findAll('div',{'class': 'job-snippet'})[i].text if len(soup.findAll('div',{'class': 'job-snippet'})) > i else "NULL"

    print('Title: '   str(jobTitle)   '\nCompany Name: '   str(companyName)   '\nLocation: '   str(location)
          '\nSalary: '   str(actualSalary)   "\nSummary: "   str(summary))
    print(' ')

driver.quit()
  • Related