I am scraping a webpage and need to return data from a <table>
in JSON format.
There's an enclosing <table >
that nests multiple <table >
(In the code below I only pasted 2).
From these nested tables I want to return a JSON array with the cell data. There's always 4 cells with different class Names. The first is a datestring and the other three are strings within a <div>
. Below I put my desired output in JSON.
I am not familiar with javascript, so this is all I have come up with until now:
var t = document.querySelectorAll('table.mydetails');
t.forEach(function(table) {
datas = table.querySelectorAll("td");
datas.forEach(function(cell) {
var divs = cell.querySelectorAll('div');
divs.forEach(function(div) {
console.log(div.textContent);
});
});
});
I am sure there's better ways to do this and I would appreciate if someone could give me a little help here.
This is my webpage:
<html>
<table class="grid_table">
<tbody>
<tr class="grid_row_1 grid_row_pr_2021-11-01" role="row">
<td role="gridcell">
<div class="classCell">
<div class="classMultiLine">
<table class="mydetails">
<tr class="">
<td class="td-e">01.11.2021</td>
<td class="td-d Target">
<div class="">Foo1</div>
</td>
<td class="td-d Mine">
<div class="">Bar1</div>
</td>
<td class="td-d His">
<div class="">FooBar1</div>
</td>
</tr>
</table>
</div>
<div class="classMultiLine">empty</div>
<div class="classMultiLine">empty</div>
</div>
</td>
</tr>
<tr class="grid_row_2 grid_row_pr_2021-11-02" role="row">
<td role="gridcell">
<div class="classCell">
<div class="classMultiLine">
<table class="mydetails">
<tr class="">
<td class="td-e">02.11.2021</td>
<td class="td-d Target">
<div class="">Foo2</div>
</td>
<td class="td-d Mine">
<div class="">Bar2</div>
</td>
<td class="td-d Hi">
<div class="">FooBar2</div>
</td>
</tr>
</table>
</div>
<div class="classMultiLine">empty</div>
<div class="classMultiLine">empty</div>
</div>
</td>
</tr>
</tbody>
</table>
<script type="text/javascript">
This is my desired output:
[
{
"date": "01.11.2021",
"Target": "Foo1",
"Mine": "Bar1",
"His": "FooBar1"
},
{
"date": "02.11.2021",
"Target": "Foo2",
"Mine": "Bar2",
"His": "FooBar2"
}
]
CodePudding user response:
Try this answer. It has included code with DEMO working. Just change in that demo the last line at the bottom from console.dir(chartData);
to console.log(chartData);
to see results in Console.
CodePudding user response:
Try the following JS code.
var t = document.querySelectorAll('table.mydetails');
var mainArray= [];
t.forEach(function(table) {
debugger;
datas = table.querySelectorAll("td");
var dateCell=datas[0].innerText;
var targetCell=datas[1].children[0].innerText;
var mineCell=datas[2].children[0].innerText;
var hisCell=datas[3].children[0].innerText;
var currentObject = {"date":dateCell, "Target":targetCell, "Mine":mineCell, "His":hisCell};
mainArray.push(currentObject);
});
console.log(JSON.stringify(mainArray));
CodePudding user response:
Here can be used array.prototype
method to create arrays, and to create objects for each mydetails table I used object constructor
. Map
method creates a new array, forEach
method - doesn't. At the end of the code, you can find 2 results (in JSON
or in js format).
let list = (t) => {
return Array.prototype.slice.call(t);
}
function Table(date,target,mine,his){
this.date = date;
this.target = target;
this.mine = mine;
this.his = his;
}
const t = document.querySelectorAll('table.mydetails');
const dataArr = list(t);
const separate = dataArr.map((el)=>{
const obj = el.querySelectorAll('td');
return list(obj);
});
const finalArray = separate.map(element => {
const text = list(element).map(el => {
return el.innerText;
});
return text;
});
const result = finalArray.map(el=>{
const final = new Table(el[0],el[1],el[2],el[3]);
return final;
});
console.log(result)
const resultFinal = JSON.stringify(result);
console.log('Scrapped results', resultFinal);