python3.7 + + mongo4.2 mysql5.7 compile environment, system for win10,
When writing the pyhton crawler encountered this problem, the following two code, the first code is to extract the national country, number of countries AS nums, respectively corresponding to the sub links, the second code is in the child take information page climb, bosses, please help and see what went wrong, it is the first code is no problem, already deposited the data in mysql, problems appeared in the second code,
The import requests
The from LXML import etree
The import pymysql
# con=pymysql. Connect (host='127.0.0.1', user='root', passwd='123456', the db='lab' charset='utf8')
Con=pymysql. Connect (host='localhost', user='root', passwd='youpassword', db='ASNS', charset='utf8')
Headers={
"Accept", "text/javascript, text/HTML, application/XML, text/XML, */*",
"Accept - the Encoding", "gzip, deflate, br",
"Accept - Language" : "useful - CN, useful; Q=0.9, useful - TW; Q=0.8, en.
q=0.7,"The user-agent: Mozilla/5.0 (Windows NT 10.0; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36 ",
"The Host" : "bgp.he.net",
"Connection", "close",
"Origin" : "https://bgp.he.net",
"Referer" : "https://bgp.he.net/"
}
Res=requests. Get (' https://bgp.he.net/report/world 'headers=headers)
HTML=etree. HTML (res) text)
Names=HTML. Xpath ("//div [@ id='countries']/table/tbody/tr/td/div [@ class=' down2 floatleft ']/text () ")
Counts=HTML. Xpath ("//div [@ id='countries']/table/tbody/tr/td [@ class="alignright']/text ()")
Hrefs=HTML. Xpath ("//div [@ id='countries']/table/tbody/tr/td/a/@ href ")
Cursor=con. Cursor ()
# print (hrefs [0])
For I in range (242) :
Nums=counts [I]. Replace (', ', '). The replace (' \ t ', '). The replace (' \ n ', '). The replace (', ')
[I]
href=https://bbs.csdn.net/topics/hrefsName=names [I]. Replace (' \ t ', '). The replace (' \ n ', '). The replace (', ')
Cursor. The execute (' insert into along (country, nums, url) values (% s, % s, % s) ', (name, nums href))
Cursor. The close ()
MIT ()
con.com
here is the second code
The import json
The import np
The import pymongo
The import pymysql
The import requests
The import time
The import numpy
The from pymongo import MongoClient
The from LXML import etree
Headers={
"Accept", "text/javascript, text/HTML, application/XML, text/XML, */*",
"Accept - the Encoding", "gzip, deflate, br",
"Accept - Language" : "useful - CN, useful; Q=0.9, useful - TW; Q=0.8, en.
q=0.7,"The user-agent: Mozilla/5.0 (Windows NT 10.0; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36 ",
"The Host" : "bgp.he.net",
"Connection", "close",
"Origin" : "https://bgp.he.net",
"Referer" : "https://bgp.he.net/cc"
}
The client=pymongo. MongoClient (' mongo://localhost: 27017/')
Mydb=client [' AS ']
DAY=time. Strftime (m_ Y_ "% % % d", the time the localtime (time. Time ()))
Mycol=mydb [' DATAS_ + DAY]
Def get_Countries () :
Con=pymysql. Connect (host='localhost', user='root', passwd='youpassword', db='ASNS', charset='utf8')
Cursor=con. Cursor ()
Cursor. The execute (" select * from along ")
Res=cursor. Fetchall ()
Cursor. The close ()
Con. The close ()
Return res
Def get_ASNs () :
Countries=get_Countries ()
For the country in countries:
Country_name=country [0]
Asn_number_o=country [1]
Asn_url=country [2]
Res=requests. Get (' http://bgp.he.net '+ asn_url, headers=headers)
HTML=etree. HTML (res) text)
ASNs={}
ASN_Parts=HTML. Xpath ("//div [@ id='country']//tr/td [@ class="alignright ']/..
")For ASN_Part ASN_Parts in:
ASN_Num=ASN_Part. Xpath ("./td/a/text () ") [0]
Attach_Info=ASN_Part. Xpath ("./td/text () ")
Try:
ASNs [ASN_Num]={
"Organization" : Attach_Info [0],
"Adjacency_V4" : Attach_Info [1],
"Route_V4" : Attach_Info [2],
"Adjacency_V6" : Attach_Info [3],
"Route_V6" : Attach_Info [4]
}
Except the Exception as e:
ASNs [ASN_Num]={
"Organization" : "'
"Adjacency" : Attach_Info [0],
"Route_V4" : Attach_Info [1],
"Adjacency_V6" : Attach_Info [2],
"Route_V6" : Attach_Info [3]
}
# print (json. Dumps (ASNs, indent=4))
Mycol. Insert_one ({
"Country" : country_name,
"ASN_Number_Register" : asn_number_o,
"ASN_Number_Find" : len (ASN_Parts),
"ASNs" : ASNs
})
If __name__=="__main__ ':
Get_ASNs ()
here is the error information,
D: \ PyCharm_workplace \ PyCharmProfession_workplace \ venv \ Scripts \ python exe D:/PyCharm_workplace query_country. Py
Traceback (the most recent call last) :
The File "D:/PyCharm_workplace query_country. Py", line 78, in & lt; module>
Get_ASNs ()
The File "D:/PyCharm_workplace query_country. Py", line 40, in get_ASNs
Countries=get_Countries ()
File "D:/PyCharm_workplace query_country. Py, 31, the line in get_Countries
Con=pymysql. Connect (host='localhost', user='root', passwd='youpassword', db='ASNS', charset='utf8')
nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull