Hi I am trying to extract the table from this site:
I tried using bs4 as well as pd.read_html. Despite seeing the table tags in the "soup" I've not been able to make it find them. My guess is the data is loading in after. I would ideally like to do this without using selenium. Ive noticed it also has a dict like object called "var model" is there a way to access this through requests? Ive copied it from the source code and posted below.
Cheers
var model = {"CurrentInterbankRate":74.72140399,"CurrentInverseInterbankRate":0.01338305,"Average":15.14101240,"HistoricalPoints":[{"PointInTime":978307199999,"InterbankRate":0.99973015,"InverseInterbankRate":1.00027002},{"PointInTime":1009843199999,"InterbankRate":1.00000857,"InverseInterbankRate":0.99999155},{"PointInTime":1041379199999,"InterbankRate":2.63289734,"InverseInterbankRate":0.43403613},{"PointInTime":1072915199999,"InterbankRate":2.94796721,"InverseInterbankRate":0.34009351},{"PointInTime":1104537599999,"InterbankRate":2.94124008,"InverseInterbankRate":0.34012553},{"PointInTime":1136073599999,"InterbankRate":2.91477610,"InverseInterbankRate":0.54410527},{"PointInTime":1167609599999,"InterbankRate":3.07339545,"InverseInterbankRate":0.32538532},{"PointInTime":1199145599999,"InterbankRate":3.11525349,"InverseInterbankRate":0.32103202},{"PointInTime":1230767999999,"InterbankRate":3.16700443,"InverseInterbankRate":0.31621115},{"PointInTime":1262303999999,"InterbankRate":3.72898339,"InverseInterbankRate":0.26848343},{"PointInTime":1293839999999,"InterbankRate":3.91137986,"InverseInterbankRate":0.25571003},{"PointInTime":1325375999999,"InterbankRate":4.12870331,"InverseInterbankRate":0.24233673},{"PointInTime":1356998399999,"InterbankRate":4.54916212,"InverseInterbankRate":0.22016157},{"PointInTime":1388534399999,"InterbankRate":5.47453082,"InverseInterbankRate":0.18368720},{"PointInTime":1420070399999,"InterbankRate":8.11288453,"InverseInterbankRate":0.12367665},{"PointInTime":1451606399999,"InterbankRate":9.24612693,"InverseInterbankRate":0.10878823},{"PointInTime":1483228799999,"InterbankRate":14.75736653,"InverseInterbankRate":0.06790187},{"PointInTime":1514764799999,"InterbankRate":16.55924652,"InverseInterbankRate":0.06057628},{"PointInTime":1546300799999,"InterbankRate":28.14054973,"InverseInterbankRate":0.03813937},{"PointInTime":1577836799999,"InterbankRate":48.19220455,"InverseInterbankRate":0.02137447},{"PointInTime":1609459199999,"InterbankRate":70.63534275,"InverseInterbankRate":0.01430140},{"PointInTime":1631282400000,"InterbankRate":92.87351896,"InverseInterbankRate":0.01078530}]},
txt = $$('TEXTAREA')[0],
humanize = true && location.hash.indexOf('dehumanize') == -1,
CodePudding user response:
You don't need to scrape the table, because the data is already available in JSON format (among others). Simply append &format=json
to the end of the URL like so:
and you get the following result:
{
"CurrentInterbankRate": 74.72140399,
"CurrentInverseInterbankRate": 0.01338305,
"Average": 15.14101240,
"HistoricalPoints": [{
"PointInTime": 978307199999,
"InterbankRate": 0.99973015,
"InverseInterbankRate": 1.00027002
}, {
"PointInTime": 1009843199999,
"InterbankRate": 1.00000857,
"InverseInterbankRate": 0.99999155
}, {
"PointInTime": 1041379199999,
"InterbankRate": 2.63289734,
"InverseInterbankRate": 0.43403613
}, {
"PointInTime": 1072915199999,
"InterbankRate": 2.94796721,
"InverseInterbankRate": 0.34009351
}, {
"PointInTime": 1104537599999,
"InterbankRate": 2.94124008,
"InverseInterbankRate": 0.34012553
}, {
"PointInTime": 1136073599999,
"InterbankRate": 2.91477610,
"InverseInterbankRate": 0.54410527
}, {
"PointInTime": 1167609599999,
"InterbankRate": 3.07339545,
"InverseInterbankRate": 0.32538532
}, {
"PointInTime": 1199145599999,
"InterbankRate": 3.11525349,
"InverseInterbankRate": 0.32103202
}, {
"PointInTime": 1230767999999,
"InterbankRate": 3.16700443,
"InverseInterbankRate": 0.31621115
}, {
"PointInTime": 1262303999999,
"InterbankRate": 3.72898339,
"InverseInterbankRate": 0.26848343
}, {
"PointInTime": 1293839999999,
"InterbankRate": 3.91137986,
"InverseInterbankRate": 0.25571003
}, {
"PointInTime": 1325375999999,
"InterbankRate": 4.12870331,
"InverseInterbankRate": 0.24233673
}, {
"PointInTime": 1356998399999,
"InterbankRate": 4.54916212,
"InverseInterbankRate": 0.22016157
}, {
"PointInTime": 1388534399999,
"InterbankRate": 5.47453082,
"InverseInterbankRate": 0.18368720
}, {
"PointInTime": 1420070399999,
"InterbankRate": 8.11288453,
"InverseInterbankRate": 0.12367665
}, {
"PointInTime": 1451606399999,
"InterbankRate": 9.24612693,
"InverseInterbankRate": 0.10878823
}, {
"PointInTime": 1483228799999,
"InterbankRate": 14.75736653,
"InverseInterbankRate": 0.06790187
}, {
"PointInTime": 1514764799999,
"InterbankRate": 16.55924652,
"InverseInterbankRate": 0.06057628
}, {
"PointInTime": 1546300799999,
"InterbankRate": 28.14054973,
"InverseInterbankRate": 0.03813937
}, {
"PointInTime": 1577836799999,
"InterbankRate": 48.19220455,
"InverseInterbankRate": 0.02137447
}, {
"PointInTime": 1609459199999,
"InterbankRate": 70.63534275,
"InverseInterbankRate": 0.01430140
}, {
"PointInTime": 1631282400000,
"InterbankRate": 92.87351896,
"InverseInterbankRate": 0.01078530
}]
}
(with a little bit formatting).