#! The/usr/bin/python
# - * - coding: utf-8 - * -
"' OS. Environ [' NLS_LANG]='SIMPLIFIED CHINESE_CHINA. UTF8'
User_agent="Mozilla/5.0 (Windows NT 10.0; WOW64. The rv: 52.0) Gecko/20100101 Firefox/52.0 "
Headers={' the user-agent: user_agent}
Start_urls='http://tieba.baidu.com/f/search/res? Ie=utf-8 '
Kw='eng'
Url=start_urls + kw
Request=urllib. Request. The request (url, headers=headers)
Reponse=urllib. Request. Urlopen (request), read ()
Print (reponse)
"'
#! The/usr/bin/env python
# - * - coding: gb2312 - * -
The import urllib. Request
The import urllib. Parse
The import sys
The from urllib. Request the import urlopen
The from bs4 import BeautifulSoup
The import time
The import tiebaUI
The class TieBa () :
Def __init__ (self) :
App=tiebaUI. QtWidgets. QApplication (sys. Argv)
MainWindow=tiebaUI. QtWidgets. QMainWindow ()
MainWindow. The show ()
The self. The UI=tiebaUI. Ui_MainWindow ()
The self. The UI. SetupUi (MainWindow)
The self. The UI. The pushButton. Clicked. Connect (TieBa. Start)
Self. Kw="222"
Self. Kw2="333"
Self. BeginPage=endPage=0
Self. Kw_url="http://tieba.baidu.com/f?"
Self. Kw2_url="http://tieba.baidu.com"
Self. UI. TextEdit. SetText (' don't need to bring "right")
Sys. Exit (app. Exec_ ())
Def writeAl (self, all_nr_print) :
# now=time. Strftime (" % % m % d % Y H % m % S ", the time the localtime (time. Time ()))
# now=time. Strftime (" % % m % d % Y H ", the time the localtime (time. Time ()))
# fname=(kw + "_" + kw2 + now + r ". TXT ")
"'
Fname=(self. Kw + "_P" + STR (self. BeginPage) + "P" + STR (self. EndPage) + "_" + self. Kw2 + r ". TXT ")
With the open (fname, 'a') as f:
(f.w rite all_nr_print + "\ n")
"'
Def all_nr (self, full_url) :
HTML=urlopen (full_url)
BsObj=BeautifulSoup (HTML, '. The HTML parser)
T1=bsObj. Find_all (' a ')
For t2 in t1:
T3=STR (t2) get (' href '))
No.t31.welcome=STR (t2) get (" title "))
If len (t3)==13:
If self. Kw2 no.t31.welcome in:
All_nr_print=no.t31.welcome + "" + STR (self. Kw2_url) + STR (t3)
Print (all_nr_print)
Self. WriteAl (all_nr_print)
Def tiebaSpider (self, kw_url beginPage, endPage) :
For the page in the range (beginPage, endPage + 1) :
Pn=(page 1) * 50
Full_url=kw_url + "& amp; Pn="+ STR (pn)
# print (" \ n % s "% full_url)
Self. All_nr (full_url)
# writeAl (full_url)
Def start (self) :
Print (' 111 ')
Self. Kw='222'
Print (self. Kw)
Problem here, click on the button can be normal output after 111, but the back is automatically withdrew, this makes the self. The kw=222 is used to test, is the following code is commented, access to content of the text box and then assigned to kw, but male and female servants do ah, ask bosses, give advice or comments about not coming you really solved, but only to post...
# self. Kw2=UI. TextEdit. ToPlainText ()
# self. BeginPage=UI. TextEdit_3. ToPlainText ()
# self. EndPage=UI. TextEdit_4. ToPlainText ()
# print (' 222 ')
# kw_key=urllib. Parse. Urlencode ({" kw ": self. Kw})
# full_url=self. Kw_url + kw_key
# self. TiebaSpider (full_url, self. BeginPage, self endPage)
# print (self. Kw, the self. Kw2, self beginPage, self, endPage)
"'
Kw=input (' please enter the name post bar:)
BeginPage=int (input (" please input the start page: "))
EndPage=int (input (" please input the end page: "))
Kw2=input (" please enter \ \ "keywords" post subject: ')
Kw_url="http://tieba.baidu.com/f?"
Kw2_url="http://tieba.baidu.com"
Kw_key=urllib. Parse. Urlencode ({" kw ": kw})
Full_url=kw_url + kw_key
TiebaSpider (full_url beginPage, endPage)
"'
If __name__=="__main__ ':
TieBa ()
Just began to learn how to make pyqt5 crawler interface, there may be some code written by strange, mainly has been described in the error, to later changed very disorderly, in addition I am reference UI file, and then logic are implemented in the py file above, there is no problem,
UI. Py code is as follows:
# - * - coding: utf-8 - * -
# Form implementation generated from reading the UI file 'tiebaUI. UI'
#
# Created by: PyQt5 UI code generator 5.11.3
#
# WARNING! All changes made in this file will be lost!
The from PyQt5 import QtCore QtGui, QtWidgets
The from PyQt5. QtWidgets import QHeaderView
The class Ui_MainWindow (object) :
Def setupUi (self, the MainWindow) :
MainWindow. SetObjectName (" MainWindow ")
MainWindow. Resize (1000, 450)
SizePolicy=QtWidgets. QSizePolicy (QtWidgets. QSizePolicy. Minimum, QtWidgets. QSizePolicy. Preferred)
SizePolicy. SetHorizontalStretch (0)
SizePolicy. SetVerticalStretch (0)
SizePolicy. SetHeightForWidth (MainWindow. SizePolicy () hasHeightForWidth ())
MainWindow. SetSizePolicy (sizePolicy)
MainWindow. SetMinimumSize (QtCore. QSize (1000, 450))
MainWindow. SetMaximumSize (QtCore. QSize (1000, 450))
Self. Centralwidget=QtWidgets. QWidget (MainWindow)
Self. Centralwidget. SetObjectName (" centralwidget ")
The self. The label=QtWidgets. QLabel (self centralwidget)
Self. Label. SetGeometry (QtCore QRect (10, 10, 81, 31))
Self. Label. SetTextFormat (QtCore. Qt. AutoText)
Self. Label. SetAlignment (QtCore. Qt. AlignCenter)
Self. Label. SetObjectName (" label ")
The self, textEdit=QtWidgets QTextEdit (self. Centralwidget)
nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull