I have this code on my python3 for E2 (dreambox)
from xml.dom import Node, minidom
from urllib.request import urlopen, Request
selectedserverurl = 'http://fairbird.liveblog365.com/TSpanel/TSipanel.xml'
def downloadxmlpage():
req = Request(selectedserverurl)
response = urlopen(req)
data = response.read()
response.close()
print("data:",data)
gotPageLoad(data)
print("gotPageLoad(data):", gotPageLoad(data))
def gotPageLoad(data = None):
if data != None:
xmlparse = minidom.parseString(data)
for plugins in xmlparse.getElementsByTagName('plugins'):
item = plugins.getAttribute('cont')
if 'TSpanel' in item:
for plugin in plugins.getElementsByTagName('plugin'):
tsitem = plugin.getAttribute('name')
print("tsitem:", tsitem)
downloadxmlpage()
I have try to read this file and extract the content from it http://fairbird.liveblog365.com/TSpanel/TSipanel.xml
But I have got this error !!
data: b'<html><body><script type="text/javascript" src="/aes.js" ></script><script>function toNumbers(d){var e=[];d.replace(/(..)/g,function(d){e.push(parseInt(d,16))});return e}function toHex(){for(var d=[],d=1==arguments.length&&arguments[0].constructor==Array?arguments[0]:arguments,e="",f=0;f<d.length;f )e =(16>d[f]?"0":"") d[f].toString(16);return e.toLowerCase()}var a=toNumbers("f655ba9d09a112d4968c63579db590b4"),b=toNumbers("98344c2eee86c3994890592585b49f80"),c=toNumbers("55cc7e99e3f798b6063f25e8b0f8aa76");document.cookie="__test=" toHex(slowAES.decrypt(c,2,a,b)) "; expires=Thu, 31-Dec-37 23:55:55 GMT; path=/"; location.href="http://fairbird.liveblog365.com/TSpanel/TSipanel.xml?i=1";</script><noscript>This site requires Javascript to work, please enable Javascript in your browser or use a browser with Javascript support</noscript></body></html>'
Traceback (most recent call last):
File "/home/raed/Desktop/test.py", line 24, in <module>
downloadxmlpage()
File "/home/raed/Desktop/test.py", line 11, in downloadxmlpage
gotPageLoad(data)
File "/home/raed/Desktop/test.py", line 16, in gotPageLoad
xmlparse = minidom.parseString(data)
File "/usr/lib/python3.10/xml/dom/minidom.py", line 2000, in parseString
return expatbuilder.parseString(string)
File "/usr/lib/python3.10/xml/dom/expatbuilder.py", line 925, in parseString
return builder.parseString(string)
File "/usr/lib/python3.10/xml/dom/expatbuilder.py", line 223, in parseString
parser.Parse(string, True)
xml.parsers.expat.ExpatError: not well-formed (invalid token): line 1, column 222
So How to solve this issue ?!!
CodePudding user response:
Your data
output is HTML, not an XML file, therefore the parser is failing.
The HTML redirects to http://fairbird.liveblog365.com/TSpanel/TSipanel.xml?i=1
using Javascript, as shown - This site requires Javascript to work
.
This is typically done to prevent anyone from scraping the page/server-files.