import re
The from urllib import request
The from IO import BytesIO
The import gzip
# the crawler purpose is clear, the host popularity ranking
# in the Google browser to find information about HTML F12 now, click the first option element
# to find the number of HTML information, small arrow, hovering in the number of
# 1 number 2 the name of the host need to grab the information
# to simulate HTTP request, send the request to the server, access to the server returns to our HTML
# use regular expressions to extract we need data (name, sentiment)
The # VScode debugging code
The class spiders () :
Url='https://www.douyu.com/g_LOL'
Root_pattern='& lt; Div & gt; ([\ s \ s] *?
#? Said not greed, \ s \ s says there are characters, * means to match zero or infinite times
R=request. Urlopen (spiders. Url)
F=gzip. GzipFile (fileobj=buff)
HTMLS=f.r ead (). The decode (' utf-8)
Root_html=re. The.findall (spiders. Root_pattern HTMLS)
HTMLS=self. __fetch_content ()
Self. __analysis (HTMLS)
Spiders. The go ()