Home > Software design >  Parse html table data to JSON array in javascript
Parse html table data to JSON array in javascript

Time:11-11

I am scraping a webpage and need to return data from a <table> in JSON format. There's an enclosing <table > that nests multiple <table > (In the code below I only pasted 2). From these nested tables I want to return a JSON array with the cell data. There's always 4 cells with different class Names. The first is a datestring and the other three are strings within a <div>. Below I put my desired output in JSON. I am not familiar with javascript, so this is all I have come up with until now:

var t = document.querySelectorAll('table.mydetails');
t.forEach(function(table) {
    datas = table.querySelectorAll("td");
    datas.forEach(function(cell) {
        
        var divs = cell.querySelectorAll('div');
        
        divs.forEach(function(div) {
            console.log(div.textContent);
        });
    });
});

I am sure there's better ways to do this and I would appreciate if someone could give me a little help here.

This is my webpage:

<html>
<table class="grid_table">
    <tbody>
        <tr class="grid_row_1 grid_row_pr_2021-11-01" role="row">
            <td role="gridcell">
                <div class="classCell">
                    <div class="classMultiLine">
                        <table class="mydetails">
                            <tr class="">
                                <td class="td-e">01.11.2021</td>
                                <td class="td-d Target">
                                    <div class="">Foo1</div>
                                </td>
                                <td class="td-d Mine">
                                    <div class="">Bar1</div>
                                </td>
                                <td class="td-d His">
                                    <div class="">FooBar1</div>
                                </td>
                            </tr>
                        </table>
                    </div>
                    <div class="classMultiLine">empty</div>
                    <div class="classMultiLine">empty</div>
                </div>
            </td>
        </tr>
        <tr class="grid_row_2 grid_row_pr_2021-11-02" role="row">
            <td role="gridcell">
                <div class="classCell">
                    <div class="classMultiLine">
                        <table class="mydetails">
                            <tr class="">
                                <td class="td-e">02.11.2021</td>
                                <td class="td-d Target">
                                    <div class="">Foo2</div>
                                </td>
                                <td class="td-d Mine">
                                    <div class="">Bar2</div>
                                </td>
                                <td class="td-d Hi">
                                    <div class="">FooBar2</div>
                                </td>
                            </tr>
                        </table>
                    </div>
                    <div class="classMultiLine">empty</div>
                    <div class="classMultiLine">empty</div>
                </div>
            </td>
        </tr>
    </tbody>
</table>
<script type="text/javascript">

This is my desired output:

[
  {
    "date": "01.11.2021",
    "Target": "Foo1",
    "Mine": "Bar1",
    "His": "FooBar1"
  },
  {
    "date": "02.11.2021",
    "Target": "Foo2",
    "Mine": "Bar2",
    "His": "FooBar2"
  }
]

CodePudding user response:

Try this answer. It has included code with DEMO working. Just change in that demo the last line at the bottom from console.dir(chartData); to console.log(chartData); to see results in Console.

CodePudding user response:

Try the following JS code.

var t = document.querySelectorAll('table.mydetails');
var mainArray= [];
t.forEach(function(table) {
debugger;
    datas = table.querySelectorAll("td");
    var dateCell=datas[0].innerText;
    var targetCell=datas[1].children[0].innerText;
    var mineCell=datas[2].children[0].innerText;
    var hisCell=datas[3].children[0].innerText;
    
    var currentObject = {"date":dateCell, "Target":targetCell, "Mine":mineCell, "His":hisCell};
    mainArray.push(currentObject);
});
console.log(JSON.stringify(mainArray));

CodePudding user response:

Here can be used array.prototype method to create arrays, and to create objects for each mydetails table I used object constructor. Map method creates a new array, forEach method - doesn't. At the end of the code, you can find 2 results (in JSON or in js format).

let list = (t) => {
        return Array.prototype.slice.call(t);
    }

    function Table(date,target,mine,his){
        this.date = date;
        this.target = target;
        this.mine = mine;
        this.his = his;
    }

    const t = document.querySelectorAll('table.mydetails');
    const dataArr = list(t);
    const separate = dataArr.map((el)=>{
        const obj = el.querySelectorAll('td');
        return list(obj);
    });
    const finalArray = separate.map(element => {
        const text = list(element).map(el => {
            return el.innerText;
        });
        return text;
    });


    const result = finalArray.map(el=>{
        const final = new Table(el[0],el[1],el[2],el[3]);
        return final;
    });
console.log(result)
    const resultFinal = JSON.stringify(result);
    console.log('Scrapped results', resultFinal);

 


    
  • Related