I have a page which has script and its contain one array which i need (myHashDay).
<script type="text/javascript">
function toggleCheckBoxes(obj) {
var theForm = document.getElementById("thePage:SiteTemplate:theForm");
for (var i=0; i<theForm.elements.length; i ){
if (theForm.elements[i].type=="checkbox" && theForm.elements[i].name != obj.name) {
theForm.elements[i].checked = false;
}
}
}
// ATLAS-1089: back & continue buttons showing twice for Reserved Group/Emergency Appointments
function checkIfButtonsShowTwice() {
// from 1st form
var continueBtn = document.getElementById("thePage:SiteTemplate:theForm:continueBtn");
var backBtn = document.getElementById("thePage:SiteTemplate:theForm:backBtn");
// from 2nd form
var continueBtnToHide = document.getElementById("thePage:SiteTemplate:theForm2:continueBtn");
var backBtnToHide = document.getElementById("thePage:SiteTemplate:theForm2:form2BackBtn");
// the controller logic for rendering the buttons is fragile so... front end solutions ftw
if(continueBtn != null){
if (continueBtnToHide != null) {
continueBtnToHide.style.display = "none";
}
}
}
var myDayHash = new Array();
myDayHash['14-9-2023'] = true;
myDayHash['4-12-2023'] = true;
myDayHash['31-1-2024'] = true;
myDayHash['1-2-2024'] = true;
myDayHash['27-2-2024'] = true;
myDayHash['28-2-2024'] = true;
myDayHash['4-3-2024'] = true;
myDayHash['5-3-2024'] = true;
myDayHash['6-3-2024'] = true;
myDayHash['7-3-2024'] = true;
myDayHash['11-3-2024'] = true;
myDayHash['12-3-2024'] = true;
myDayHash['13-3-2024'] = true;
myDayHash['14-3-2024'] = true;
myDayHash['18-3-2024'] = true;
myDayHash['19-3-2024'] = true;
myDayHash['20-3-2024'] = true;
myDayHash['21-3-2024'] = true;
myDayHash['25-3-2024'] = true;
myDayHash['26-3-2024'] = true;
myDayHash['27-3-2024'] = true;
var ofcAptDateStr = null;ofcAptDateStr = '';
var splitDate = 'Thu Sep 14 00:00:00 GMT 2023'.split(" ");
var minApptDate = splitDate[1] ' ' splitDate[2] ' ' splitDate[5];
}
</script>
So i need to get myDayHash array from it.
What i am trying to do:
driver.get('\test.html')
element = driver.execute_script("myDayHash")
But it doesnt return anything I tried element = driver.execute_script("return myDayHash") as well. But it returns none.
But if i use console in chrome broswer and type "myDayHash" it prints my whole array.
How can i get this array to python?
CodePudding user response:
Here is a solution using BeautifulSoup and regex.
Fetch the data
from bs4 import BeautifulSoup
import requests
import re
r = requests.get('http://website.com/test.html')
soup = BeautifulSoup(r.content)
array = soup.select('script')
Get the text from each script tag
text = ' '.join([elem.text for elem in array])
Apply regex to get myDayHash
The below regex gives you the myDayHash
datastructure values in the form of list of tuples.
myDayHash = re.findall(r"myDayHash\[\'(.*?)\'\] = (.*?);", text)
Generating Output :
print(dict(myDayHash))
Output
This gives us the expected output, now based on your requirement you can store the key:value into any data structure.
{
'14-9-2023': 'true',
'4-12-2023': 'true',
'31-1-2024': 'true',
'1-2-2024': 'true',
'27-2-2024': 'true',
'28-2-2024': 'true',
'4-3-2024': 'true',
'5-3-2024': 'true',
'6-3-2024': 'true',
'7-3-2024': 'true',
'11-3-2024': 'true',
'12-3-2024': 'true',
'13-3-2024': 'true',
'14-3-2024': 'true',
'18-3-2024': 'true',
'19-3-2024': 'true',
'20-3-2024': 'true',
'21-3-2024': 'true',
'25-3-2024': 'true',
'26-3-2024': 'true',
'27-3-2024': 'true'
}
TL:DR
from bs4 import BeautifulSoup
import requests
import re
r = requests.get('http://website.com/test.html')
soup = BeautifulSoup(r.content)
array = soup.select('script')
text = ' '.join([elem.text for elem in array])
myDayHash = re.findall(r"myDayHash\[\'(.*?)\'\] = (.*?);", text)
print(dict(myDayHash))
CodePudding user response:
That variable is defined in the function's scope (not global). In other words you can't.