This demo consumers while True judgment of the line
If I judge, then he will not perform the following to save the picture code,
If without this judgment, then he will download, but on will not automatically end,
Anyone who can help me to solve it the end of this automatic,
import requests
The from LXML import etree
The from urllib import request
From the queue the import queue
The import OS
The import threading
HEASERS={
'the user-agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 '
}
# producers
Class Producer (threading. Thread) :
Domain_name='http://www.netbian.com'
Def __init__ (self, page_queue, img_queue, * args, * * kwargs) :
Super (Producer, self) __init__ (* args, * * kwargs)
Self. Page_queue=page_queue
Self. Img_queue=img_queue
Def run (self) :
While True:
If self. Page_queue. Empty () :
Break
Url=self. Page_queue. The get ()
Self. Get_detail_urls (url)
# get images under the first page address
Def get_detail_urls (self, url) :
The response=requests. Get (url=url, headers=HEASERS)
Text=the response. The text
HTML=etree. HTML (text)
The lis=HTML. Xpath ('//div/@//li//a/@ href ')
For li in lis:
Href=(' href 'https://bbs.csdn.net/topics/li.get
# self. Img_queue. Put (self domain_name + href)
Self. Set_detail_urls (self. Domain_name + href)
# get high-definition pictures under the second page address
Def set_detail_urls (self, url) :
The response=requests. Get (url=url, headers=HEASERS)
Text=response. Content. decode (' GBK ')
HTML=etree. HTML (text)
The lis=HTML. Xpath ('//div/@//img/@ SRC ')
For li in lis:
SRC=https://bbs.csdn.net/topics/li.get (' SRC ')
Alt=li. Get (' Alt ')
Suffix=OS. Path. Splitext (SRC) [1]
Self. Img_queue. Put ((SRC, Alt + suffix))
# print (self) page_queue) qsize ())
# print (self) img_queue) qsize ())
# consumers download
Class Consumer (threading. Thread) :
Def __init__ (self, page_queue, img_queue, * args, * * kwargs) :
The self, super (Consumer) __init__ (* args, * * kwargs)
Self. Page_queue=page_queue
Self. Img_queue=img_queue
# get img_queue queue data in a
Def run (self) :
While True:
If self. Page_queue. Empty () and the self. The img_queue. Empty () :
Break
Url, Alt=self. Img_queue. The get ()
Request. Urlretrieve (url, "imgs/+ Alt)
Print (Alt + 'the download is complete! ')
Def the main () :
# to create a production two queues, a consumer
Page_queue=Queue (100)
Img_queue=Queue (1000)
Base_url='http://www.netbian.com/e/sch/index.php? Page={} & amp; The rid_device_info_keyboard=B6 AF FE& C2 % % % %; Totalnum=1062 '
For the x in the range (0, 1) :
Url=base_url. The format (x)
Page_queue. Put (url)
For x in range (5) :
T=Producer (page_queue img_queue)
T.s tart ()
For x in range (5) :
T=Consumer (page_queue img_queue)
T.s tart ()
If __name__=="__main__ ':
The main ()