How do I scrape a value that is generated within Javascript.
I have been trying to figure this out for a few days and now I'm stuck. I have the page login stuff working.
The page looks like this in a browser and I want to extract the SoC% value and nothing else. In this example the value is 92.16%
This page will auto update every 10 minute.
I can see the part of the JS that returns the value but I don't know how to scrape this value into a variable in my script.
if ('battery_soc' in d.last) {
content ="<td>" d.last.battery_soc "%</td>";
}
else {
content ="<td class='hidden-xs'>—</td>";
}
Here is the full html page if that helps.
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=9;IE=10;IE=Edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="Description" content="Select.Live is web based SCADA for SP Pro inverters and its system" />
<meta name="Author" content="Selctronic Australia Pty Ltd" />
<title>Select.Live Portal | Selectronic Australia</title>
<link href="https://fonts.googleapis.com/css?family=Titillium Web" rel="stylesheet">
<link href="/css/bootstrap.min.css" rel="stylesheet" />
<link href="/css/zebra_datepicker.min.css" rel="stylesheet" />
<link href="/css/ad-style.css" rel="stylesheet" />
<script src="/js/jquery.min.js"></script>
<script src="/js/bootstrap.min.js"></script>
<script src="/js/zebra_datepicker.min.js"></script>
<script src="/js/user_geolocation.js"></script>
<script type="text/javascript">
var geocodeKey = "";
</script>
</head>
<body>
<div >
<button type="button" data-toggle="open">
<span ></span> <span ></span> <span ></span>
</button>
<div ></div>
<div ></div>
</div>
<div >
<div align="center" >
<ul>
<li><a href="/systems"><span ></span><span >Systems</span></a></li>
<li><a href="/myprofile"><span ></span><span >My Profile</span></a></li>
<li><a href="/logout"><span ></span><span >Logout</span></a></li>
</ul>
</div>
<script type="text/javascript">
$(".side-menu li:nth-child(1)").addClass("active");
</script>
<div >
<div >
<div id="map" style="height: 300px;"></div>
</div>
<!-- My Systems -->
<div >
<div >
<div >
<div >
<h3>My Systems</h3>
</div>
<div >
<table id="ownerSystems" >
<thead>
<tr>
<th>System Name</th>
<th>Status</th>
<th>SoC</th>
<th >Production</th>
<th >Purchased</th>
<th >Consumption</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
<div align="right"> <a href="#" ><span ></span> Add a System</a> </div>
</div>
</div>
</div>
</div>
<!-- Other systems (have installer access to these) 0 -->
<div id="otherSystems" >
<div >
<div >
<div >
<h3>Other Systems</h3>
</div>
<div >
<table id="installerSystems" >
<thead>
<tr>
<th>System Name</th>
<th>Status</th>
<th>SoC</th>
<th >Production</th>
<th >Purchased</th>
<th >Consumption</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</div>
</div>
</div>
<div ></div>
</div>
</div>
<div >
<div >
<div >
<div >
<div style="overflow-y: auto;">
<div >
<div > <a href="#" ><span ></span></a> </div>
<div ></div>
</div>
<div >
<h3>Add a new System to your profile</h3>
<p>Connect your Select.Live Device to your SP PRO and set it up so that it is connected to the Internet.</p>
<p>Please find the Device ID and Serial number on the LCD screen of your Select.Live Device as shown in the example,
and copy those details into the form below.
</p>
<img src="images/LCD_claim_Selectronic.png">
<form id="claim_form" style="clear:both;">
<div >
<label for="claim_code">Device ID</label>
<input type="text" id="claim_code" name="devhash" placeholder="id">
</div>
<div >
<label for="claim_serial">Serial</label>
<input type="text" id="claim_serial" name="serialnum" placeholder="serial number">
</div>
<div >
<label for="claim_type">Access Required</label><br>
<label ><input type="radio" name="claim_type" value="owner" checked> Owner</label>
<label ><input type="radio" name="claim_type" value="installer"> Installer</label>
</div>
<button type="button" id="add_claim" value="Add">Add System</button>
</form>
<br /><br /><br />
<div id="claim_failed"></div>
</div>
</div>
</div>
</div>
</div>
</div>
<script type="text/javascript" src="https://maps.googleapis.com/maps/api/js?key=AIzaSyDl070Qq1sR3HnNr3LegChHPV8c7WWjZM4"></script>
<script type="text/javascript">
var map = new google.maps.Map(document.getElementById('map'), {
zoom: 10,
center: {lat: -37.7621346, lng: 145.3132782 },
gestureHandling: 'cooperative',
streetViewControl: false,
fullscreenControl: false
});
var getData=function(installer) {
$.ajax({
url:'systems/list' (installer ? '/installer' : '/owner'),
type:'get',
dataType: 'json',
cache: false,
success:function(data) {
var tableID = data.installer ? 'installerSystems' : 'ownerSystems';
// data=JSON.parse(data);
if(data.systems.length) {
var content="";
var time_diff;
data.systems.forEach(function(d) {
content ="<tr onm ouseover='zoom(" d.lat "," d.lng ")' onm ouseout='zoomout()'>";
content ="<td><a href='/dashboard/" d.did "'>" d.name "</a></td>";
content ="<td>";
var con_stat="<span class='glyphicon glyphicon-ok-sign color-green s-large' style='vertical-align:middle;'></span> ";
if (d.events) con_stat="<span class='glyphicon glyphicon-exclamation-sign color-yellow s-large' style='vertical-align:middle;'></span> ";
if (d.last) {
time_diff = d.delta_ts;
console.log("time_diff=" time_diff);
if(time_diff<60) {
con_stat ="<span>" Math.round(time_diff) " seconds ago</span>";
}
else if(time_diff<1200) {
con_stat ="<span>" Math.round(time_diff/60) " minutes ago</span>";
}
else if(time_diff<3600) {
con_stat="<span class='glyphicon glyphicon-remove-sign color-red s-large' style='vertical-align:middle;'></span> ";
con_stat ="<span>" Math.round(time_diff/60) " minutes ago</span>";
}
else if(time_diff<86400) {
con_stat="<span class='glyphicon glyphicon-remove-sign color-red s-large' style='vertical-align:middle;'></span> ";
con_stat ="<span>" Math.round(time_diff/3600) " hours ago</span>";
}
else {
con_stat="<span class='glyphicon glyphicon-remove-sign color-red s-large' style='vertical-align:middle;'></span> ";
con_stat ="<span>" Math.round(time_diff/86400) " days ago</span>";
}
content =con_stat;
content ="</td>";
if ('battery_soc' in d.last) {
content ="<td>" d.last.battery_soc "%</td>";
}
else {
content ="<td class='hidden-xs'>—</td>";
}
if ('solar_wh_total' in d.last) {
content ="<td class='hidden-xs'>" d.last.solar_wh_total.toFixed() " kWh</td>";
}
else {
content ="<td class='hidden-xs'>—</td>";
}
if ('grid_in_wh_total' in d.last) {
content ="<td class='hidden-xs'>" d.last.grid_in_wh_total.toFixed() " kWh</td>";
}
else {
content ="<td class='hidden-xs'>—</td>";
}
if ('load_wh_total' in d.last) {
content ="<td class='hidden-xs'>" d.last.load_wh_total.toFixed() " kWh</td>";
}
else {
content ="<td class='hidden-xs'>—</td>";
}
}
else {
content = "<span class='glyphicon glyphicon-remove-sign color-red s-large' style='vertical-align:middle;'></span>";
content = "<span>No Measurements Recorded</span></td>";
content = "<td>—</td><td class='hidden-xs'>—</td><td class='hidden-xs'>—</td><td class='hidden-xs'>—</td>";
}
content ="</tr>";
var marker=new google.maps.Marker({position: {lat: d.lat, lng: d.lng}, map: map, title:d.name});
});
$('table#' tableID ' tbody').html(content);
if (data.installer) $('div#otherSystems.hidden').removeClass('hidden');
}
else if (!data.installer) {
$('table#' tableID ' tbody').html("<tr><td colspan='7'>You don't have any SP Pro Systems</td></tr>");
}
}
});
}
var addSystem=function() {
var serial = $('#claim_serial').val();
var code = $('#claim_code').val();
var access = $('input[name=claim_type]:checked').val();
console.log('Attempting to claim with serial=' serial ', code=' code ', access=' access);
$.ajax({
url:'systems/claim',
type:'post',
data: { code: code, serial: serial, access: access },
dataType: 'json',
cache: false
})
.done(function(data) {
var is_installer = (data.access == 'installer') ? true : false;
$('.overlay').hide();
$('div#claim_failed').html('');
$('form#claim_form input').val('');
getData(is_installer);
})
.fail(function(data) {
if (data.responseJSON.reason.match(/No matching inverter/i)) {
$('div#claim_failed').html('<p> </p><h3 >Could not find a matching inverter</h3>'
'<p >Please check the following to fix this error:</p>'
'<ul style="padding-left:20px;">'
'<li>Check that the Select.Live Device is powered on with text visible on the screen'
'<li>Check that the Select.Live Device screen shows "Cloud: OK" and an IP address'
'<li>If the Select.Live Device screen shows "Cloud: ERROR" or "Cloud: NO LAN":'
'<ul style="padding-left:50px;">'
'<li>for WiFi connection, check your WiFi router is operating correctly, and reset it if necessary'
'<li>for WiFi connection, check that there is a good WiFi signal at your Select.Live device<br>'
'(use your mobile phone or tablet to confirm that the WiFi signal is present)'
'<li>for Ethernet connection, check that the cable is plugged in firmly at both ends;'
' also check using another device, e.g. a laptop computer, that the cable is working.'
'</ul></ul>'
'<p >If you have checked all the above and still get this error when you '
'attempt to add the system, you will need to reset your Select.Live device and start the '
'setup process again. To reset your Select.Live Device, press and hold the black reset button '
'for 10 seconds.</p>');
}
else if (data.responseJSON.reason.match(/Access Denied/i)) {
$('div#claim_failed').html('<p> </p><h3 >Access denied by owner</h3>'
'<p >The owner of this SP PRO has not given permission for you to have access.</p>'
'<p >Please check you entered the correct <b>Device ID</b> and <b>Serial number</b>.'
' If you think they are correct, you will need to ask the owner of this SP PRO to grant access.</p>');
}
else if (data.responseJSON.reason.match(/No Owner/i)) {
$('div#claim_failed').html('<p> </p><h3 >Access Denied</h3>'
'<p >Installer access to this SP PRO is not allowed.</p>');
}
else {
$('.overlay').hide();
alert(data.responseJSON.reason);
}
});
};
$('a.add_system').on('click',function(){
$('.overlay').show();
});
$('a.overlay_close').on('click',function(){
$('.overlay').hide();
});
$('#add_claim').on('click',addSystem);
$('button.side-toggle').on('click',function(){
if($(this).hasClass("openned")) {
$(this).removeClass("openned");
$(".side-menu").removeClass("in");
}
else {
$(this).addClass("openned");
$(".side-menu").addClass("in");
}
});
var zoom=function(x,y) {
map.panTo({lat:x,lng: y});
map.setZoom(18);
}
var zoomout=function() {
map.setZoom(10);
}
$(window).ready(function() {
getData(false);
});
$(window).resize(function(){
});
</script>
</body>
</html>
Here is my script so far
const scrapePersons = async () => {
// import launchChrome and newPage from the browser.js file in the same directory
const { launchChrome } = require("./browser");
// Flow 1 => Launching chrome and opening a new tab/page
const [newPage, exitChrome] = await launchChrome();
const [page] = await newPage();
const emailSelector="input[name=email]";
const pwdSelector="input[name=pwd]";
const btnSelector=".btn";
// exit the function if the tab is not properly opened
if (!page) return;
// Flow 2 => Visiting a website's home page
const url = "https://select.live/";
console.log("Opening " url);
try {
await page.goto(url, {
waitUntil: "networkidle0", // wait till all network requests has been processed
});
} catch(e) {
console.error("Unable to visit " url, e);
await exitChrome(); // close chrome on error
return; // exiting the function
}
//Perform the Login
await page.waitForSelector(emailSelector);
console.log('40 Found name="email" on page');
await page.waitForSelector(pwdSelector);
console.log('50 Found name="pwd" on page');
await page.waitForSelector(btnSelector);
console.log('55 Found the button with class namne btn');
await page.type(emailSelector, 'Username Goes Here');
await page.type(pwdSelector, 'Password Goes Here');
console.log('60 Entered email and password');
//Click the Login Butotn
try{
await page.click(btnSelector);
console.log('70 Clicked the Login Button');
}
catch(e){
console.error('Unable to click the login button' btnSelector ' ', e)
}
// Find the Power Percentage Value
await exitChrome(); // close chrome
console.log('900 Exited Chrome')
};
module.exports = scrapePersons;
CodePudding user response:
try waiting for table cell to be rendered with page.waitForSelector:
// systems table row
const socSelector = '#ownerSystems tbody tr';
// wait for it to render
const socCell = await page.waitForSelector(socSelector, {
visible: true
});
// extract value from third cell for each row
const socVal = await page.evaluate(socSelector => Array.from(document.querySelectorAll(`${socSelector} td:nth-child(3)`)).map(el => el.textContent), socSelector);
console.log(socVal);