I am trying to make the process of finding a new apartment easier. This is supposed to scrape for the main information from each listing in a specified area. Not really sure what is wrong but it asks for the inputs but then the program just keeps running. No error code, or anything. The only change I've tried so far was removing the search and grab for the href link. Same situation after the change
from bs4 import BeautifulSoup
import requests
from csv import writer
desiredcity = input("Name of City?").replace(' ', '-')
s_pagerange = int(input("Starting Page"))
e_pagerange = int(input("Ending Page")) 1
for int in range(s_pagerange, e_pagerange):
url = "https://www.apartments.com/{}/" desiredcity.lower().replace(',','-') "{}/".format(str(s_pagerange))
page = requests.get(url)
print(url)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all('li', class_="mortar-wrapper")
with open('{}_{}.csv'.format(desiredcity, str(s_pagerange)), 'w', encoding ='utf8', newline = '') as f:
thewriter = writer(f)
header = ['Name', 'Address', 'Number', 'How Many Beds', 'Pricing']
thewriter.writerow(header)
for list in lists:
if list.find('div', class_="property-title"):
name = list.find('div', class_="property-title").text
else:
name = " "
if list.find('div', class_="property-address js-url"):
address = list.find('div', class_="property-address js-url").text
else:
address = " "
if list.find('div', class_="phone-link js-phone"):
number = list.find('div', class_="phone-link js-phone").text
else:
number = " "
if list.find('p', class_="property-beds"):
hmb = list.find('p', class_="property-beds").text
else:
hmb = " "
if list.find('p', class_="property-pricing"):
pricing = list.find('p', class_="property-pricing").text
else:
pricing = " "
info = [name, address, number, hmb, pricing]
thewriter.writerow(info)
print("Page {} complete".format(str(s_pagerange)))
s_pagerange = s_pagerange 1
CodePudding user response:
Try:
[..]
url = f"https://www.apartments.com/{desiredcity.lower().replace(',','-')}/{str(s_pagerange)}/"
[...]
You might have other inconsistencies in code, I didn't check absolutely everything.
CodePudding user response:
The website is probably blocking unusually frequent requests(per second), so you need to create a delay:
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)'
'AppleWebKit/537.36 (KHTML, like Gecko)'
'Chrome/45.0.2454.101 Safari/537.36'),
'referer': 'http://stats.nba.com/scores/'}
page = requests.get(url, headers = HEADERS)
so this should work:
from bs4 import BeautifulSoup
import requests
from csv import writer
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)'
'AppleWebKit/537.36 (KHTML, like Gecko)'
'Chrome/45.0.2454.101 Safari/537.36'),
'referer': 'http://stats.nba.com/scores/'}
desiredcity = input("Name of City?").replace(' ', '-')
s_pagerange = int(input("Starting Page "))
e_pagerange = int(input("Ending Page ")) 1
for i in range(e_pagerange):
url = "https://www.apartments.com/{}/" desiredcity.lower().replace(',','-') "{}/".format(str(s_pagerange))
page = requests.get(url, headers = HEADERS)
print(url)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all('li', class_="mortar-wrapper")
with open('{}_{}.csv'.format(desiredcity, str(s_pagerange)), 'w', encoding ='utf8', newline = '') as f:
thewriter = writer(f)
header = ['Name', 'Address', 'Number', 'How Many Beds', 'Pricing']
thewriter.writerow(header)
for list in lists:
if list.find('div', class_="property-title"):
name = list.find('div', class_="property-title").text
else:
name = " "
if list.find('div', class_="property-address js-url"):
address = list.find('div', class_="property-address js-url").text
else:
address = " "
if list.find('div', class_="phone-link js-phone"):
number = list.find('div', class_="phone-link js-phone").text
else:
number = " "
if list.find('p', class_="property-beds"):
hmb = list.find('p', class_="property-beds").text
else:
hmb = " "
if list.find('p', class_="property-pricing"):
pricing = list.find('p', class_="property-pricing").text
else:
pricing = " "
info = [name, address, number, hmb, pricing]
thewriter.writerow(info)
print("Page {} complete".format(str(s_pagerange)))
s_pagerange = s_pagerange 1