Home > other >  Record learning python for a month
Record learning python for a month

Time:11-13

The import requests, time, OS
The import LXML
The from bs4 import BeautifulSoup
The from gevent. Queue the import queue
The import threading

The class PictureMM () :
Def __init__ (self, page) :
The self. The headers={' the user-agent ':' xi xi xi}
The self. The page=page
Self. Url='{https://www.mmonly.cc/mmtp/list_9_}. HTML'
Self. Url_list0_queue=Queue ()
Self. Url_list1_queue=Queue ()
Self. Url_list2_queue=Queue ()
Self. Url_queue=Queue ()
Self. Name1_queue=Queue ()
Self. Name2_queue=Queue ()
The self. The count=0
Self. Lock1=threading. The Lock ()
Self. Lock2=threading. The Lock ()
Self. Lock3=threading. The Lock ()

Def url_list (self) :
"" "for each page url "","
For I in range (int (self. Page)) :
Url_page=self. Url. The format (I + 1)
Self. Url_list0_queue. Put (url_page)
# print (url_page)

Def get_img_page (self) :
"" "access to a single page of the url of the picture "" "
While len (self url_list0_queue) & gt; 0:
Url=self. Url_list0_queue. The get ()
Res=requests. Get (url, headers=self. Headers). The decode (' GBK ')
Soup=BeautifulSoup (res, 'LXML)
Datas=soup. The find (' div 'id=' infinite_scroll '). The find_all (' div 'class_=' item masonry_brick masonry - brick ')
For the data in datas:
Img_url=data. The find (' div '). The find (' div '). The find (' div '). The find (' a ') [' href ']
Img_name=data. The find (' div '). The find (' div '). The find (' div '). The find (' a '). The find (' img) [' Alt ']
Self. Url_list1_queue. Put (img_url)
Self. Name1_queue. Put (img_name)
# print (self. Url_list1_queue)
# print (self. Name1_queue)

Def get_img_url (self) :
For each image url and name "" "" ""
While len (self url_list1_queue) & gt; 0:
Url=self. Url_list1_queue. The get ()
Name=self. Name1_queue. The get ()
Res=requests. Get (url, headers=self. Headers). The decode (' GBK ')
Soup=BeautifulSoup (res, 'LXML)
Datas=soup. The find (' div 'class_="wrapper clearfix imgtitle"), find (' div' class_='pages'), find (' ul'). Find_all (' li ')
For I in range (1, len (datas) - 2) :
If I==1:
Img_url url=
Img_name=name + STR (I)
Self. Url_list2_queue. Put (img_url)
Self. Name2_queue. Put (img_name)
The else:
LST=list (url)
LST. Insert (5, '_' + STR (I))
Img_url="'. Join (LST)
Img_name=name + STR (I)
Self. Url_list2_queue. Put (img_url)
Self. Name2_queue. Put (img_name)
# print (self. Url_list2_queue)
# print (self. Name2_queue)

Def download_url (self) :
"" "for each image download address "" "
While len (self url_list2_queue) & gt; 0:
Url=self. Url_list2_queue. The get ()
Res=requests. Get (url, headers=self. Headers). The decode (' GBK ')
Soup=BeautifulSoup (res, 'LXML)
Img_url=soup. The find (id='big - PIC), find (" p "). The find (' a'). The find (' img) [' SRC ']
Self. Url_queue. Put (img_url)

Def download_img (self) :
"" "download and save each image "","
While len (self url_queue) & gt; 0:
Url=self. Url_queue. The get ()
Name=self. Name2_queue. The get ()
With the open (f 'sister figure/{name}. JPG', 'wb) as file:
Res=requests. Get (url). The content
File. The write (res)
The self. The count +=1
Print (name + "-- -- -- -- -- -- -- download success!" )

Def run (self) :
"" "add multi-threaded "" "
Self. Url_list ()
Thread_list1=[]
Thread_list2=[]
Thread_list3=[]
Thread_list4=[]

"" "multithreaded execution "" "
For I in range (20) :
T_get_img_page=threading. Thread (target=self. Get_img_page)
Thread_list1. Append (t_get_img_page)
For I in range (20) :
T_get_img_url=threading. Thread (target=self. Get_img_url)
Thread_list2. Append (t_get_img_url)
For I in range (20) :
T_download_url=threading. Thread (target=self. Download_url)
Thread_list3. Append (t_download_url)
For I in range (20) :
T_download_img=threading. Thread (target=self. Download_img)
Thread_list4. Append (t_download_img)

For t in thread_list1:
T.s etDaemon (True)
T.s tart ()
For t in thread_list1:
T.j oin ()

For t in thread_list2:
T.s etDaemon (True)
T.s tart ()
For t in thread_list2:
T.j oin ()

For t in thread_list3:
T.s etDaemon (True)
T.s tart ()
For t in thread_list3:
T.j oin ()

For t in thread_list4:
T.s etDaemon (True)
T.s tart ()
For t in thread_list4:
T.j oin ()

Print (" all the images are downloaded completed ")

If __name__=="__main__ ':
Print (" welcome to the beautiful batch download!" )
Page=input (" please enter the need to download page (page 170) : ")
If not OS. Path. The exists (' sister figure) :
OS. The mkdir (' sister figure ')
My=PictureMM (page)
Start=time. Time ()
My. The run ()
End=time. Time () - start
Print (" total download % d pictures, available: %. 2 seconds f "% (my count, end))

CodePudding user response:

  • Related