Home > Enterprise >  unable to extract the table using bs4
unable to extract the table using bs4

Time:09-18

Hi I am trying to extract the table from this site:

Target Site for Scrape

I tried using bs4 as well as pd.read_html. Despite seeing the table tags in the "soup" I've not been able to make it find them. My guess is the data is loading in after. I would ideally like to do this without using selenium. Ive noticed it also has a dict like object called "var model" is there a way to access this through requests? Ive copied it from the source code and posted below.

Cheers

var model = {"CurrentInterbankRate":74.72140399,"CurrentInverseInterbankRate":0.01338305,"Average":15.14101240,"HistoricalPoints":[{"PointInTime":978307199999,"InterbankRate":0.99973015,"InverseInterbankRate":1.00027002},{"PointInTime":1009843199999,"InterbankRate":1.00000857,"InverseInterbankRate":0.99999155},{"PointInTime":1041379199999,"InterbankRate":2.63289734,"InverseInterbankRate":0.43403613},{"PointInTime":1072915199999,"InterbankRate":2.94796721,"InverseInterbankRate":0.34009351},{"PointInTime":1104537599999,"InterbankRate":2.94124008,"InverseInterbankRate":0.34012553},{"PointInTime":1136073599999,"InterbankRate":2.91477610,"InverseInterbankRate":0.54410527},{"PointInTime":1167609599999,"InterbankRate":3.07339545,"InverseInterbankRate":0.32538532},{"PointInTime":1199145599999,"InterbankRate":3.11525349,"InverseInterbankRate":0.32103202},{"PointInTime":1230767999999,"InterbankRate":3.16700443,"InverseInterbankRate":0.31621115},{"PointInTime":1262303999999,"InterbankRate":3.72898339,"InverseInterbankRate":0.26848343},{"PointInTime":1293839999999,"InterbankRate":3.91137986,"InverseInterbankRate":0.25571003},{"PointInTime":1325375999999,"InterbankRate":4.12870331,"InverseInterbankRate":0.24233673},{"PointInTime":1356998399999,"InterbankRate":4.54916212,"InverseInterbankRate":0.22016157},{"PointInTime":1388534399999,"InterbankRate":5.47453082,"InverseInterbankRate":0.18368720},{"PointInTime":1420070399999,"InterbankRate":8.11288453,"InverseInterbankRate":0.12367665},{"PointInTime":1451606399999,"InterbankRate":9.24612693,"InverseInterbankRate":0.10878823},{"PointInTime":1483228799999,"InterbankRate":14.75736653,"InverseInterbankRate":0.06790187},{"PointInTime":1514764799999,"InterbankRate":16.55924652,"InverseInterbankRate":0.06057628},{"PointInTime":1546300799999,"InterbankRate":28.14054973,"InverseInterbankRate":0.03813937},{"PointInTime":1577836799999,"InterbankRate":48.19220455,"InverseInterbankRate":0.02137447},{"PointInTime":1609459199999,"InterbankRate":70.63534275,"InverseInterbankRate":0.01430140},{"PointInTime":1631282400000,"InterbankRate":92.87351896,"InverseInterbankRate":0.01078530}]},
    txt = $$('TEXTAREA')[0],
    humanize = true && location.hash.indexOf('dehumanize') == -1,

CodePudding user response:

You don't need to scrape the table, because the data is already available in JSON format (among others). Simply append &format=json to the end of the URL like so:

https://api.ofx.com/PublicSite.ApiService/SpotRateHistory/allTime/USD/ARS?DecimalPlaces=8&ReportingInterval=yearly&format=json

and you get the following result:

{
    "CurrentInterbankRate": 74.72140399,
    "CurrentInverseInterbankRate": 0.01338305,
    "Average": 15.14101240,
    "HistoricalPoints": [{
        "PointInTime": 978307199999,
        "InterbankRate": 0.99973015,
        "InverseInterbankRate": 1.00027002
    }, {
        "PointInTime": 1009843199999,
        "InterbankRate": 1.00000857,
        "InverseInterbankRate": 0.99999155
    }, {
        "PointInTime": 1041379199999,
        "InterbankRate": 2.63289734,
        "InverseInterbankRate": 0.43403613
    }, {
        "PointInTime": 1072915199999,
        "InterbankRate": 2.94796721,
        "InverseInterbankRate": 0.34009351
    }, {
        "PointInTime": 1104537599999,
        "InterbankRate": 2.94124008,
        "InverseInterbankRate": 0.34012553
    }, {
        "PointInTime": 1136073599999,
        "InterbankRate": 2.91477610,
        "InverseInterbankRate": 0.54410527
    }, {
        "PointInTime": 1167609599999,
        "InterbankRate": 3.07339545,
        "InverseInterbankRate": 0.32538532
    }, {
        "PointInTime": 1199145599999,
        "InterbankRate": 3.11525349,
        "InverseInterbankRate": 0.32103202
    }, {
        "PointInTime": 1230767999999,
        "InterbankRate": 3.16700443,
        "InverseInterbankRate": 0.31621115
    }, {
        "PointInTime": 1262303999999,
        "InterbankRate": 3.72898339,
        "InverseInterbankRate": 0.26848343
    }, {
        "PointInTime": 1293839999999,
        "InterbankRate": 3.91137986,
        "InverseInterbankRate": 0.25571003
    }, {
        "PointInTime": 1325375999999,
        "InterbankRate": 4.12870331,
        "InverseInterbankRate": 0.24233673
    }, {
        "PointInTime": 1356998399999,
        "InterbankRate": 4.54916212,
        "InverseInterbankRate": 0.22016157
    }, {
        "PointInTime": 1388534399999,
        "InterbankRate": 5.47453082,
        "InverseInterbankRate": 0.18368720
    }, {
        "PointInTime": 1420070399999,
        "InterbankRate": 8.11288453,
        "InverseInterbankRate": 0.12367665
    }, {
        "PointInTime": 1451606399999,
        "InterbankRate": 9.24612693,
        "InverseInterbankRate": 0.10878823
    }, {
        "PointInTime": 1483228799999,
        "InterbankRate": 14.75736653,
        "InverseInterbankRate": 0.06790187
    }, {
        "PointInTime": 1514764799999,
        "InterbankRate": 16.55924652,
        "InverseInterbankRate": 0.06057628
    }, {
        "PointInTime": 1546300799999,
        "InterbankRate": 28.14054973,
        "InverseInterbankRate": 0.03813937
    }, {
        "PointInTime": 1577836799999,
        "InterbankRate": 48.19220455,
        "InverseInterbankRate": 0.02137447
    }, {
        "PointInTime": 1609459199999,
        "InterbankRate": 70.63534275,
        "InverseInterbankRate": 0.01430140
    }, {
        "PointInTime": 1631282400000,
        "InterbankRate": 92.87351896,
        "InverseInterbankRate": 0.01078530
    }]
}

(with a little bit formatting).

  • Related