Home > Back-end >  <script> can not be found under <div id="xxxxx">
<script> can not be found under <div id="xxxxx">

Time:11-27

I am trying to web-scraping the historical price with python from this URL.

Elements from developer tools showing the information exists

CodePudding user response:

Below is an example how to grab the required script tag from API response and rest of your task.

import pandas as pd
import requests
from bs4 import BeautifulSoup

url = 'https://www.dotproperty.co.th/en/condo/2945/nai-harn-beach-condominium'
headers={
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
    }
soup = BeautifulSoup(requests.get(url,headers=headers).text, 'html.parser')
token = soup.select_one('meta[name="csrf-token"]').get('content')

header = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'x-csrf-token': token,
    'x-requested-with': 'XMLHttpRequest'
    
}
api_url= 'https://www.dotproperty.co.th/en/market-stats/project-page/condo/?key=2945&pv_id=sea_th_pv_4df81b83-00a9-4453-97b3-80c92eea45b9'

data=[]

res=requests.get(api_url,headers=header).json()['msg']
soup = BeautifulSoup(res,'html.parser')
#print(soup.prettify())
script = soup.find('script').get_text(strip=True)
print(script)

Output:

function getChartDataSets(type) {
            var chartData = {
                                'sale': {
                    data : {
                        dataSets : {
                            'left':{
                                // yAxisID: "y-axis-0",
                                radius: 0,
                                data:[4850000,4962500,4770833],
                                borderColor: "rgb(79,190,75)",
                                backgroundColor: "rgb(79,190,75)",
                                fill:false,
                                showLine:true,
                                label:'Median sale price'
                            },
                                                        'right': {
                                // yAxisID: "y-axis-1",
                                radius: 0,
                                data:[4917801,5167798,5124463],
                                borderColor: "rgb(204,51,51)",
                                backgroundColor: "rgb(204,51,51)",
                                fill:false,
                                showLine:true,
                                label: 'Mueang Phuket'   ' Median sale price'
                            }
                                                    }
                    },
                    yAxes : {
                        min: 4450000,
                        max: 5450000,
                        stepSize: 250000,
                    }

                },
                                                'sqmSale': {
                    data : {
                        dataSets : {
                            'left':{
                                // yAxisID: "y-axis-0",
                                radius: 0,
                                data:[88867,94915,94594],
                                borderColor: "rgb(79,190,75)",
                                backgroundColor: "rgb(79,190,75)",
                                fill:false,
                                showLine:true,
                                label:'Median sale price/sqm.'
                            },
                                                                                            'right': {
                                    // yAxisID: "y-axis-1",
                                    radius: 0,
                                    data:[97669,91350,92066],
                                    borderColor: "rgb(204,51,51)",
                                    backgroundColor: "rgb(204,51,51)",
                                    fill:false,
                                    showLine:true,
                                    label: htmlDecode('Mueang Phuket'   ' Median sale price/sqm.')
                                }
                                                                                    }
                    },
                    yAxes : {
                        min: 83000,
                        max: 103000,
                        stepSize: 5000,
                    }

                },
                                                            }
            return chartData[type];
        }
        function getDataSets(chartType){
            var dataSetsAr = getChartDataSets(chartType);
            var dataSets;
            if (dataSetsAr.data.dataSets.right) {
                datasets = [dataSetsAr.data.dataSets.left, dataSetsAr.data.dataSets.right];      
            }else{
                datasets = [dataSetsAr.data.dataSets.left];
            }
            var dataSets = {
                labels: ["Sep 2022","Oct 2022","Nov 2022"],
                datasets: datasets
            }
            return dataSets;
        }
        function getOptions(chartType){
            var dataSetsArr = getChartDataSets(chartType);
            // console.log(getChartOptionsDataSets(chartType).scales.left.ticks.max);
            var options = {
                maintainAspectRatio: false,
                responsive: true,
                maintainAspectRatio: false,
                legend: {
                    display: true,
                    position: 'bottom',
                    onClick: function(e, legendItem) {
                      var index = legendItem.datasetIndex;
                      var ci = this.chart;
                      var alreadyHidden = (ci.getDatasetMeta(index).hidden === null) ? false : ci.getDatasetMeta(index).hidden;
                      var anyOthersAlreadyHidden = false;
                      var allOthersHidden = true;

                      // figure out the current state of the labels
                      ci.data.datasets.forEach(function(e, i) {
                        var meta = ci.getDatasetMeta(i);

                        if (i !== index) {
                          if (meta.hidden) {
                            anyOthersAlreadyHidden = true;
                          } else {
                            allOthersHidden = false;
                          }
                        }
                      });

                      // if the label we clicked is already hidden
                      // then we now want to unhide (with any others already unhidden)
                      if (alreadyHidden) {
                        ci.getDatasetMeta(index).hidden = null;
                      } else {
                        // otherwise, lets figure out how to toggle visibility based upon the current state
                        ci.data.datasets.forEach(function(e, i) {
                          var meta = ci.getDatasetMeta(i);

                          if (i !== index) {
                            // handles logic when we click on visible hidden label and there is currently at least
                            // one other label that is visible and at least one other label already hidden
                            // (we want to keep those already hidden still hidden)
                            if (anyOthersAlreadyHidden && !allOthersHidden) {
                              meta.hidden = true;
                            } else {
                              // toggle visibility
                              meta.hidden = meta.hidden === null ? !meta.hidden : null;
                            }
                          } else {
                            meta.hidden = null;
                          }
                        });
                      }
                      if (!allOthersHidden && !anyOthersAlreadyHidden && !alreadyHidden) {       
                         delete ci.options.scales.yAxes[0].ticks.min
                         delete ci.options.scales.yAxes[0].ticks.max
                         delete ci.options.scales.yAxes[0].ticks.stepSize
                      }else{
                         updateChart();
                         return;
                      }
                      ci.update();
                    },
                },
                hover: {
                  mode: 'nearest',
                  intersect: false,
                },
                title:{
                    display: false,
                    text: 'Median Sale'
                },
                tooltips: {
                    intersect: false,
                    mode: 'index',
                    callbacks: {
                        label: function(tooltipItem, data) {
                            var label = data.datasets[tooltipItem.datasetIndex].data[tooltipItem.index];
                            return ' ฿'   addCommas(label);
                        }
                    },
                //     custom: function(tooltip) {
                        //         if (!tooltip) return;
                        //         // disable displaying the color box;
                        //         tooltip.displayColors = false;
                        //     },
                },

                scales: {
                    xAxes: [{
                        // padding: 100,
                        display: true,

                        gridLines : {
                           display : false
                        }
                    }],
                    yAxes: [{
                        gridLines : {
                           display : true
                        },
                        ticks: {
                            // beginAtZero:true,
                            fontColor: "rgb(79,190,75)",
                            min:dataSetsArr.yAxes.min,
                            max: dataSetsArr.yAxes.max,
                            stepSize: dataSetsArr.yAxes.stepSize,
                            fontSize: 10,
                            beginAtZero: false,
                            userCallback: function(value, index, values) {
                                value = value.toString();
                                return '฿'   nFormatter(value);
                            }
                        },
                    }]
                }
            }

            return options;
        }
        var ctx = document.getElementById("market-stats-chart").getContext('2d');
        var chartType = $('#market-stats-type').val();

        marketStatsChart = new Chart(ctx, {
           type: 'line',
           data: getDataSets(chartType),
           options: getOptions(chartType)
       });
    //    document.getElementById('indicator-graph-panel').style.display = 'none';
       function updateChart(){
           var newType = $('#market-stats-type').val();
           var newDataSets =  getChartDataSets(newType);


           var dataSets;
           if (newDataSets.data.dataSets.right) {
               datasets = [newDataSets.data.dataSets.left, newDataSets.data.dataSets.right];     
           }else{
               datasets = [newDataSets.data.dataSets.left];
           }

           marketStatsChart.options.scales.yAxes[0].ticks.min = newDataSets.yAxes.min;
           marketStatsChart.options.scales.yAxes[0].ticks.max = newDataSets.yAxes.max;
           marketStatsChart.options.scales.yAxes[0].ticks.stepSize = newDataSets.yAxes.stepSize; 

           marketStatsChart.data.datasets = datasets;
           marketStatsChart.update();
       }
       $('#market-stats-type').on('change', updateChart);

       function nFormatter(num) {
            if (num >= 1000000000) {
                return ( Math.round( ((num / 1000000000) * 100)) / 100).toFixed(1)     'B';      
            //    return (num / 1000000000).toFixed(1).replace(/\.0$/, '')   'G';
            }
            if (num >= 1000000) {
                return (Math.round( ((num / 1000000) * 100) )  / 100 ).toFixed(1)  'M';
            //    return (num / 1000000).toFixed(1).replace(/\.0$/, '')   'M';
            }
            if (num >= 1000) {
                return (Math.round( ((num / 1000) * 100) )  / 100).toFixed(1)   'K';
            //    return (num / 1000).toFixed(1).replace(/\.0$/, '')   'K';
            }
            return num;
       }
  • Related