I am trying to web-scraping the historical price with python from this URL.
CodePudding user response:
Below is an example how to grab the required script tag from API response and rest of your task.
import pandas as pd
import requests
from bs4 import BeautifulSoup
url = 'https://www.dotproperty.co.th/en/condo/2945/nai-harn-beach-condominium'
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
}
soup = BeautifulSoup(requests.get(url,headers=headers).text, 'html.parser')
token = soup.select_one('meta[name="csrf-token"]').get('content')
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-csrf-token': token,
'x-requested-with': 'XMLHttpRequest'
}
api_url= 'https://www.dotproperty.co.th/en/market-stats/project-page/condo/?key=2945&pv_id=sea_th_pv_4df81b83-00a9-4453-97b3-80c92eea45b9'
data=[]
res=requests.get(api_url,headers=header).json()['msg']
soup = BeautifulSoup(res,'html.parser')
#print(soup.prettify())
script = soup.find('script').get_text(strip=True)
print(script)
Output:
function getChartDataSets(type) {
var chartData = {
'sale': {
data : {
dataSets : {
'left':{
// yAxisID: "y-axis-0",
radius: 0,
data:[4850000,4962500,4770833],
borderColor: "rgb(79,190,75)",
backgroundColor: "rgb(79,190,75)",
fill:false,
showLine:true,
label:'Median sale price'
},
'right': {
// yAxisID: "y-axis-1",
radius: 0,
data:[4917801,5167798,5124463],
borderColor: "rgb(204,51,51)",
backgroundColor: "rgb(204,51,51)",
fill:false,
showLine:true,
label: 'Mueang Phuket' ' Median sale price'
}
}
},
yAxes : {
min: 4450000,
max: 5450000,
stepSize: 250000,
}
},
'sqmSale': {
data : {
dataSets : {
'left':{
// yAxisID: "y-axis-0",
radius: 0,
data:[88867,94915,94594],
borderColor: "rgb(79,190,75)",
backgroundColor: "rgb(79,190,75)",
fill:false,
showLine:true,
label:'Median sale price/sqm.'
},
'right': {
// yAxisID: "y-axis-1",
radius: 0,
data:[97669,91350,92066],
borderColor: "rgb(204,51,51)",
backgroundColor: "rgb(204,51,51)",
fill:false,
showLine:true,
label: htmlDecode('Mueang Phuket' ' Median sale price/sqm.')
}
}
},
yAxes : {
min: 83000,
max: 103000,
stepSize: 5000,
}
},
}
return chartData[type];
}
function getDataSets(chartType){
var dataSetsAr = getChartDataSets(chartType);
var dataSets;
if (dataSetsAr.data.dataSets.right) {
datasets = [dataSetsAr.data.dataSets.left, dataSetsAr.data.dataSets.right];
}else{
datasets = [dataSetsAr.data.dataSets.left];
}
var dataSets = {
labels: ["Sep 2022","Oct 2022","Nov 2022"],
datasets: datasets
}
return dataSets;
}
function getOptions(chartType){
var dataSetsArr = getChartDataSets(chartType);
// console.log(getChartOptionsDataSets(chartType).scales.left.ticks.max);
var options = {
maintainAspectRatio: false,
responsive: true,
maintainAspectRatio: false,
legend: {
display: true,
position: 'bottom',
onClick: function(e, legendItem) {
var index = legendItem.datasetIndex;
var ci = this.chart;
var alreadyHidden = (ci.getDatasetMeta(index).hidden === null) ? false : ci.getDatasetMeta(index).hidden;
var anyOthersAlreadyHidden = false;
var allOthersHidden = true;
// figure out the current state of the labels
ci.data.datasets.forEach(function(e, i) {
var meta = ci.getDatasetMeta(i);
if (i !== index) {
if (meta.hidden) {
anyOthersAlreadyHidden = true;
} else {
allOthersHidden = false;
}
}
});
// if the label we clicked is already hidden
// then we now want to unhide (with any others already unhidden)
if (alreadyHidden) {
ci.getDatasetMeta(index).hidden = null;
} else {
// otherwise, lets figure out how to toggle visibility based upon the current state
ci.data.datasets.forEach(function(e, i) {
var meta = ci.getDatasetMeta(i);
if (i !== index) {
// handles logic when we click on visible hidden label and there is currently at least
// one other label that is visible and at least one other label already hidden
// (we want to keep those already hidden still hidden)
if (anyOthersAlreadyHidden && !allOthersHidden) {
meta.hidden = true;
} else {
// toggle visibility
meta.hidden = meta.hidden === null ? !meta.hidden : null;
}
} else {
meta.hidden = null;
}
});
}
if (!allOthersHidden && !anyOthersAlreadyHidden && !alreadyHidden) {
delete ci.options.scales.yAxes[0].ticks.min
delete ci.options.scales.yAxes[0].ticks.max
delete ci.options.scales.yAxes[0].ticks.stepSize
}else{
updateChart();
return;
}
ci.update();
},
},
hover: {
mode: 'nearest',
intersect: false,
},
title:{
display: false,
text: 'Median Sale'
},
tooltips: {
intersect: false,
mode: 'index',
callbacks: {
label: function(tooltipItem, data) {
var label = data.datasets[tooltipItem.datasetIndex].data[tooltipItem.index];
return ' ฿' addCommas(label);
}
},
// custom: function(tooltip) {
// if (!tooltip) return;
// // disable displaying the color box;
// tooltip.displayColors = false;
// },
},
scales: {
xAxes: [{
// padding: 100,
display: true,
gridLines : {
display : false
}
}],
yAxes: [{
gridLines : {
display : true
},
ticks: {
// beginAtZero:true,
fontColor: "rgb(79,190,75)",
min:dataSetsArr.yAxes.min,
max: dataSetsArr.yAxes.max,
stepSize: dataSetsArr.yAxes.stepSize,
fontSize: 10,
beginAtZero: false,
userCallback: function(value, index, values) {
value = value.toString();
return '฿' nFormatter(value);
}
},
}]
}
}
return options;
}
var ctx = document.getElementById("market-stats-chart").getContext('2d');
var chartType = $('#market-stats-type').val();
marketStatsChart = new Chart(ctx, {
type: 'line',
data: getDataSets(chartType),
options: getOptions(chartType)
});
// document.getElementById('indicator-graph-panel').style.display = 'none';
function updateChart(){
var newType = $('#market-stats-type').val();
var newDataSets = getChartDataSets(newType);
var dataSets;
if (newDataSets.data.dataSets.right) {
datasets = [newDataSets.data.dataSets.left, newDataSets.data.dataSets.right];
}else{
datasets = [newDataSets.data.dataSets.left];
}
marketStatsChart.options.scales.yAxes[0].ticks.min = newDataSets.yAxes.min;
marketStatsChart.options.scales.yAxes[0].ticks.max = newDataSets.yAxes.max;
marketStatsChart.options.scales.yAxes[0].ticks.stepSize = newDataSets.yAxes.stepSize;
marketStatsChart.data.datasets = datasets;
marketStatsChart.update();
}
$('#market-stats-type').on('change', updateChart);
function nFormatter(num) {
if (num >= 1000000000) {
return ( Math.round( ((num / 1000000000) * 100)) / 100).toFixed(1) 'B';
// return (num / 1000000000).toFixed(1).replace(/\.0$/, '') 'G';
}
if (num >= 1000000) {
return (Math.round( ((num / 1000000) * 100) ) / 100 ).toFixed(1) 'M';
// return (num / 1000000).toFixed(1).replace(/\.0$/, '') 'M';
}
if (num >= 1000) {
return (Math.round( ((num / 1000) * 100) ) / 100).toFixed(1) 'K';
// return (num / 1000).toFixed(1).replace(/\.0$/, '') 'K';
}
return num;
}