the from LXML import etree
The import requests
The import CSV
The import time
Def spiders () :
# define the crawler head
Headers={' the user-agent ':' Mozilla/5.0 (Windows NT 10.0; Win64; X64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
# using a for loop structure in the first 10 pages URL, and GET request down
Pre_url='https://shenzhen.qfang.com/sale/f'
For x in range (2, 4) :
HTML=requests. Get (pre_url + STR (x), headers=headers)
Print (HTML. Status_code)
# print (HTML content. Decode (' utf-8))
Time. Sleep (2)
# initialization etree
The selector=etree. HTML (HTML. Text)
#/HTML/body/div [4]/div/div [1]/div [4]/ul/li
# get housing list
#/HTML/body/div [4]/div/div [1]/div [4]/ul/li
House_list=selector. Xpath ("/HTML/body/div [4]/div/div [1]/div [4]/ul/li ")
Print (house_list)
For house in house_list:
Apartment=house. Xpath (" div [2]/div [1]/a/text () ") [0]
Print (apartment)
#/HTML/body/div [4]/div/div [1]/div [4]/ul/li [1]/div [2]/div [2]/[1] p
House_layout=house. Xpath (' div [2]/div p [1] [2]//text () ') [0]
#/HTML/body/div [4]/div/div [1]/div [4]/ul/li [1]/div [2]/div [2]/[2] p
Area=house. Xpath (' div [2]/div [2]/[2] p/text () ') [0]
#/HTML/body/div [4]/div/div [1]/div [4]/ul/li [1]/div [2]/div [3]
Region=house. Xpath (' div [2]/div [3]/a/text () ') [0]
Total_price=house. Xpath (" div [3]/p/text () ") [0]
The item=[apartment, house_layout, area, region, total_price]
Data_writer (item)
Print (' are fetching, apartment)
Def data_writer (item) :
With the open (' qfang_ershoufang. CSV ', 'a', encoding="utf-8", newline=' ') as csvfile:
Writer.=the CSV writer (csvfile)
Writer. Writerow (item)
If __name__=="__main__ ':
Spiders ()
Don't appear any content,,,