Originally wanted to be like that,
- coding - 8 # - * - * -
The import requests
The import LXML
The import XLRD
The import XLWT
The import sys
The from bs4 import BeautifulSoup
The import XLWT
The import time
The import urllib
Def craw (url, key_word) :
User_Agent='Mozilla/5.0 (Windows NT 10.0; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/Edge/537.36 18.17763 '
Re=r 'https://www.qcc.com/search? Key='+ key_word
Headers={
'Accept' : 'text/HTML, application/XHTML + XML, application/XML. Q=0.9 */*; Q=0.8 ',
'the Accept - Encoding' : 'gzip, deflate, br',
'the Accept - Language' : 'useful - Hans - CN, useful - Hans; Q=0.8, en - US; Q=0.6, en. Q=0.4, ko. Q=0.2 ',
: 'cookies' r' _XXXXXXXXXXXXXXXX ',
'Host' : 'www.qcc.com',
'the Upgrade - Insecure - Request' : '1',
'the user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/Edge/18.17763 ', 537.36
}
Try:
The response=requests. Get (url, headers=headers)
If the response status_code!=200:
The response. The encoding="utf-8"
Print (response. Status_code)
Print (" ERROR ")
Soup=BeautifulSoup (the response text, 'LXML')
Except the Exception:
Both print (' request, the enterprise check is to want to go??? ')
Try:
FRTRT com_all_info=soup. Find_all (class_=' ') [0]. The tbody
Com_all_info_array=com_all_info. Select (" td ")
Print (' began to crawl the data, please do not open the excel)
# the try:
Temp_g_name=com_all_info_array [2]. Select (' a ') [0]. # text company name
Temp_r_name=com_all_info_array [2]. Select (" p ") [0]. A.t ext # corporate name
Temp_g_money=com_all_info_array [2]. Select (" p ") [0]. Select (' span) [0]. Text. The strip (' registered capital: ') # registered capital
Temp_g_date=com_all_info_array [2]. Select (" p ") [0]. Select (' span) [1]. The text. The strip (' establishment date: ') # establishment date
E=com_all_info_array [2]. Select (" p ") [1]. The select (' a ') [1] # mailbox
F=e.s trip (' showHisEmail.) strip ('; '). The replace (' {', ' '). The replace ('} ', '). The replace ('/', ' '). The replace ('] ', '). The replace (' : ', '). The replace (' "', ' '). The replace (' e ', '). The replace (' s', '). The replace (' (', ' '). The replace (') ', ')
G=f.s plit () ", "
L=0
H=[]
While l & lt; Len (g) :
If l % 2==0:
H.a ppend (STR) (g/l)
L +=1
='\ n' temp_r_email. Join (h)
A=com_all_info_array [2]. Select (" p ") [1]. The select (' a ') [0] # phone
B=a.s trip (' showHisTel.) strip ('; '). The replace (' {', ' '). The replace ('} ', '). The replace ('/', ' '). The replace ('] ', '). The replace (' : ', '). The replace (' "', ' '). The replace (' t ', '). The replace (' s', '). The replace (' (', ' '). The replace (') ', ')
C=b.s plit () ", "
J=0
D=[]
While j & lt; Len (c) :
If j % 2==0:
Da ppend (STR) [j] (c)
J +=1
='\ n' temp_r_phone. Join (d)
Temp_g_addr=com_all_info_array [2]. Select (" p ") [2]. The text. The strip (). The strip (' address: ') # address
Temp_r_numm=com_all_info_array [2]. Select (" p ") [0]. Select (' span) [1]. The text. The strip (' unified social credit code: ') # reference code
G_name_list. Append (temp_g_name)
R_name_list. Append (temp_r_name)
G_money_list. Append (temp_g_money)
G_date_list. Append (temp_g_date)
R_email_list. Append (temp_r_email)
R_phone_list. Append (temp_r_phone)
G_addr_list. Append (temp_g_addr)
R_numm_list. Append (temp_r_numm)
# the except Exception:
# print (' error! ')
Except the Exception:
Print (' seemed to be refused access... Please try the request again beep... ')
If __name__=="__main__ ':
Global g_name_list
Global r_name_list
Global g_money_list
Global g_date_list
Global r_email_list
Global r_phone_list
Global g_addr_list
Global r_numm_list
Col=[]
Data=https://bbs.csdn.net/topics/xlrd.open_workbook (r 'C: \' check data source. XLSX ')
Sheet=data. Sheet_by_name (' Sheet1)
Col=sheet. Col_values (0)
For key_word in col:
Time. Sleep (15)
Key_word=urllib. Parse. Quote (key_word)
Url=r 'https://www.qcc.com/search? Key='+ key_word
Craw (url, key_word)
Print (' searching, please later)
Workbook=XLWT. Workbook ()
Sheet1=workbook. Add_sheet (' companies check data, cell_overwrite_ok=True)
Style=XLWT. XFStyle ()
The font=XLWT. The font ()
The font. The name='imitation song dynasty style typeface'
The font, bold=True
Style. The font=the font
Print (' are stored data, please do not open the excel)
G_name_list=[]
R_name_list=[]
G_money_list=[]
G_date_list=[]
R_email_list=[]
R_phone_list=[]
G_addr_list=[]
R_numm_list=[]
Name_list=[' company name ', 'statutory legal persons',' registered capital ', 'date', 'corporate email', 'corporate phone', 'the company address,' unified social credit code]
nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull