Here is my code, please help me to look at:
The from the selenium import webdriver
The from the selenium. Webdriver. Chrome. The options import options
The import urllib. Request
The import threading
The import sqlite3
The import OS
Import a datetime
The from selenium.webdriver.com mon. Keys import keys
The import time
The class MySpider:
Hearders={
"The user-agent: Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; En - US; The rv: 1.9 the pre) Gecko/2008072421 Minefield/3.0.2 pre
"}
ImagePath="download"
Def startUP (self, url, key) :
Chrome_options=Options ()
Chrome_options. Add_argument (' - headless ')
Chrome_options. Add_argument (' - disable - gpu)
The self. The driver=webdriver. Chrome (options=chrome_options)
Self. Threads=[]
Self. No=0
Self. ImgNo=0
Try:
Self. Con=sqlite3. Connect (" phones. Db ")
The self. The cursor=self. Con. Cursor ()
Try:
The self. The cursor. The execute (drop table "phones")
Except:
Pass
Try:
SQL="create table phones (mNo varchar (32) primary key, mMark varchar (256), mPrice varchar (32), mNote varchar (1024), the mFile varchar (256)"
The self. The cursor. The execute (SQL)
Except:
Pass
Except the Exception as err:
Print (err)
Try:
If not OS. Path. The exists (MySpider imagePath) :
OS. The mkdir (MySpider. ImagePath)
Images=OS. Listdir (MySpider. ImagePath)
For img in images:
S=OS. Path. Join (MySpider imagePath, img)
OS. Remove (s)
Except the Exception as err:
Print (err)
The self. The driver. The get (url)
KeyInput=self. Driver. Find_element_by_id (" key ")
KeyInput. Send_keys (key)
KeyInput. Send_keys (Keys. ENTER)
Def closeUp (self) :
Try:
MIT ()
self.con.comSelf. Con. The close ()
The self. The driver. The close ()
Except the Exception as err:
Print (err);
Def insertDB (self, mNo, mMark, mPrice, mNote, mFile) :
Try:
SQL="insert into phones (mNo, mMark, mPrice, mNote, mFile) values (?,?,?,?,? ,? ,? ,? ,?) "
The self. The cursor. The execute (SQL, (mNo, mMark, mPrice, mNote, mFile))
Except the Exception as err:
Print (err)
Def showDB (self) :
Try:
Con=sqlite3. Connect (" phones. Db ")
Cursor=con. Cursor ()
Print (" % % - 8 - s - 16 s % % - 8 - s - 16 s % s "% (" No", "Mark", "Price", "image", "Note"))
Cursor. The execute (" select mNo, mMark, mPrice, mNote, mFile from phones order by mNo ")
Rows=cursor. Fetchall ()
For the row in rows:
Print (" % % - 8 - s - 16 s % % - 8 - s - 16 s % s "% (row [0], row [1], the row [2], the row [3], the row [4]))
Con. The close ()
Except the Exception as err:
Print (err)
Def the download (self, src1, src2, mFile) :
data=https://bbs.csdn.net/topics/None
If src1:
Try:
The req=urllib. Request. The request (src1, headers=MySpider. Headers)
Resp=urllib. Request. Urlopen (the req, timeout=400)
Data=(https://bbs.csdn.net/topics/resp.read)
Except:
Pass
If not the data and src2:
Try:
The req=urllib. Request. The request (src2, headers=MySpider. Headers)
Resp=urllib. Request. Urlopen (the req, timeout=400)
Data=(https://bbs.csdn.net/topics/resp.read)
Except:
Pass
If data:
Fobj=open (MySpider imagePath + "\ " + mFile, "wb")
Fobj. Write (data)
Fobj. Close ()
Print (" download ", mFile)
Def processSpider (self) :
Try:
Time. Sleep (1)
Print (self) driver) current_url)
The lis=self. Driver. Find_elements_by_xpath (".//div [@ id='J_goodList']//li [@ class='gl - item'] ")
For li in lis:
Try:
Src1=li. Find_elements_by_xpath (".//div [@ class="p - img]//a/img"). The get_attribute (" SRC ")
Except:
Src1=""
Try:
Src2=li. Find_elements_by_xpath (".//div [@ class="p - img]//a/img"). The get_attribute (" data - lazy - img ")
Except:
Src2=""
Try:
Price=li. Find_elements_by_xpath (".//div [@ class='p - price']//I "). The text
Except:
Price="0"
Try:
Note=li. Find_elements_by_xpath (".//div [@ class='p - name p - name - type - 2']//em "). The text
Mark=note. The split (" ") [0]
Mark=mark. The replace (" love east east \ n ", "")
Mark=mark. The replace (", ", "")
Note=note. Replace (" love east east \ n ", "")
Note=note. Replace (", ", "")
Except:
Note=""
Mark=""
Self. No=self. No + 1
No=STR (self. No)
While len (no) & lt; 6:
No="0" + no
Print (no, mark, price)
If src1:
Src1=urllib. Request. Urljoin (self) driver) current_url, src1)
P=src1. Rfind (". ")
The mFile=no + src1/p:
Elif src2:
Src2=urllib. Request. Urljoin (self) driver) current_url, src2)
P=src2. Rfind (". ")
Src2 mFile=no +/p:
If src1 or src2:
T=threading. Thread (target=self. Download, args=(src1, src2, mFile))
T.s etDaemon (False)
T.s tart ()
Self. Threads. Append (T)
The else:
MFile=""
Self. InsertDB (no, mark, price, note, mFile)
Try:
Self. Driver. Find_elements_by_xpath (".//span [@ class='p - num']//a [@ class='pn - next disabled'] ")
Except:
NextPage=self. Driver. Find_elements_by_xpath (".//span [@ class='p - num']//a [@ class='pn - next'] ")
NextPage. Click ()
Self. ProcessSpider ()
Except the Exception as err:
nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull