The code below
# to get the data of script
The import re
The import requests
The import pandas as pd
The from bs4 import BeautifulSoup
The import LXML
Def GetSchoolRank () :
Print (' is to get the data... ')
The session=requests. The session ()
Base_url='http://www.cdgdc.edu.cn/webrms/pages/Ranking/xkpmGXZJ2016.jsp'
Headers={
'the user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64. X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 '
}
Res=session. Get (=base_url url, headers=headers)
Soup=BeautifulSoup (res) text, '. The HTML parser)
# get professional name + professional code
The subjects=[e. ext for e in soup. Find_all (' a ', running the text=re.com (r '\ d \ d \ d \ d))]
# on a a broad categories to obtain professional name + professional code
For e in soup. Find_all (' a ', {' href ': re.com from running (' \? XKDM=\ d \ d ')}) [1] :
[url=base_url + e 'href'] [16:]
Res=session. Get (url=url, headers=headers)
Soup=BeautifulSoup (res) text, '. The HTML parser)
The subjects. The extend ([ee. The text for ee in soup. Find_all (' a ', running the text=re.com (r '\ d \ d \ d \ d))])
Df=pd DataFrame (columns=[' school code ', 'school', 'professional,' and 'evaluation'])
For e in the subjects:
Major_name=e/4:
Major_code=e [: 4]
Url=base_url + '? Yjxkdm=% s' % major_code
Res=session. Get (url=url, headers=headers)
Soup=BeautifulSoup (res) text, '. The HTML parser)
Table=soup. Find (' table ', dict (bgcolor="# c2d8e5," border="0", cellpadding="0", cellspacing="1",
Width="610 px")). Find_all (" td ")
School_code_regular=re.com running (r) '\ d {5}'
Assessment='
For a row in the table:
Text=row. The text
If school_code_regular. Match (text) :
School_code, school_name=text. The split (')
School_code=school_code. Replace (', ')
School_name=school_name. Replace (', ')
Df=df append ({' school code: school_code. Replace (' \ n ', '). The strip (), 'school name: school_name. Replace (' \ n', '). The strip (), 'professional name: major_name. Replace (' \ n', '). The strip (), 'assessment: assessment, replace (' \ n', '). The strip ()},
Ignore_index=True)
The else:
Assessment=text
Df. To_excel (' schoolRank. XLSX, index=None)
Df. To_csv (' schoolRank. CSV, index=None)
Print (' to get the data complete ')
If __name__=="__main__" :
GetSchoolRank ()
And then complains
ValueError Traceback (the most recent call last)
64
65 if __name__=="__main__" :
- & gt; 66 GetSchoolRank ()Text=47 row. The text
48 if school_code_regular. Match (text) :
- & gt; 49 school_code, school_name=text. The split (' ')
50 school_code=school_code. Replace (', ')
51 school_name=school_name. Replace (', ')
ValueError: not enough values to unpack (expected 2, got 1)
Help!!!!!! Save the children TATTTT