Home > Mobile >  Webscraping: No any Data Shown in scrapy
Webscraping: No any Data Shown in scrapy

Time:11-15

I am trying to crawl this website but I am getting empty response I am using scrapy and I tried printing xpath but I am getting empty array I though this was straight forward task but now I am unable to get data from the table. here is my code.

import scrapy
from scrapy import Request


class ShareInfoSpider(scrapy.Spider):
    name = 'share'

    def start_requests(self):
        url = "https://nepsealpha.com/investment-calandar/ipo"
        yield Request(url, callback=self.parse)

    def parse(self, response):
        for tr in response.xpath("//table[@id='DataTables_Table_0']"):
            print(tr)

CodePudding user response:

The data you see is requested from other URL via JavaScript, so you need other methods how to get the data. For example requests/json:

import json
import requests
import pandas as pd


params = {
    "draw": "1",
    "columns[0][data]": "symbol",
    "columns[0][name]": "symbol",
    "columns[0][searchable]": "true",
    "columns[0][orderable]": "true",
    "columns[0][search][value]": "",
    "columns[0][search][regex]": "false",
    "columns[1][data]": "units",
    "columns[1][name]": "units",
    "columns[1][searchable]": "true",
    "columns[1][orderable]": "true",
    "columns[1][search][value]": "",
    "columns[1][search][regex]": "false",
    "columns[2][data]": "opening_date",
    "columns[2][name]": "opening_date",
    "columns[2][searchable]": "true",
    "columns[2][orderable]": "true",
    "columns[2][search][value]": "",
    "columns[2][search][regex]": "false",
    "columns[3][data]": "closing_date",
    "columns[3][name]": "closing_date",
    "columns[3][searchable]": "true",
    "columns[3][orderable]": "true",
    "columns[3][search][value]": "",
    "columns[3][search][regex]": "false",
    "columns[4][data]": "issue_manager",
    "columns[4][name]": "issue_manager",
    "columns[4][searchable]": "true",
    "columns[4][orderable]": "true",
    "columns[4][search][value]": "",
    "columns[4][search][regex]": "false",
    "columns[5][data]": "status",
    "columns[5][name]": "status",
    "columns[5][searchable]": "true",
    "columns[5][orderable]": "true",
    "columns[5][search][value]": "",
    "columns[5][search][regex]": "false",
    "columns[6][data]": "view",
    "columns[6][name]": "view",
    "columns[6][searchable]": "true",
    "columns[6][orderable]": "true",
    "columns[6][search][value]": "",
    "columns[6][search][regex]": "false",
    "start": "0",
    "length": "10",
    "search[value]": "",
    "search[regex]": "false",
}

api_url = "https://nepsealpha.com/investment-calandar/ipo"
headers = {"X-Requested-With": "XMLHttpRequest"}

data = requests.get(api_url, params=params, headers=headers).json()

# uncomment to print all data
# print(json.dumps(data, indent=4))

df = pd.DataFrame(data["data"])
print(df)

Prints:

     id                                                                                               symbol                        company_name    units                             opening_date                             closing_date                 issue_manager                   created_at                   updated_at price last_closing_date      type                                                                                                                                                                                                         url                                                                               status                                                                                                                                                                                                                                                                       view  DT_RowIndex
0  3033                      <a href='/stocks/BARAHI (Local)/info' target='_blank'><b>BARAHI (Local)</b></a>    Barahi Hydropower Public Limited   250000  <span class='no-wrap'>2022-10-21</span>  <span class='no-wrap'>2022-11-20</span>      NIBL Ace Capital Limited  2022-11-04T03:37:01.000000Z  2022-11-04T03:37:01.000000Z   100              None  Ordinary                                                                                                                                                                                                        None                          <span text-align: center" >Open</span>                                                                                                                                                                                                                                                                       None            1
1  3034                                    <a href='/stocks/PEOPLES/info' target='_blank'><b>PEOPLES</b></a>  Peoples Hydropower Company Limited  5606390  <span class='no-wrap'>2022-11-11</span>  <span class='no-wrap'>2022-11-15</span>        Sanima Capital Limited  2022-11-04T06:05:01.000000Z  2022-11-04T06:05:01.000000Z   100              None  Ordinary  https://nepsealpha.com/announcement/peoples-hydropower-company-limited-issued-a-letter-of-invitation-to-the-general-public-to-issue-56-lakh-6-thousand-390-shares-of-rs-100-face-value-from-25th-of-kartik                          <span text-align: center" >Open</span>  <a href='https://nepsealpha.com/announcement/peoples-hydropower-company-limited-issued-a-letter-of-invitation-to-the-general-public-to-issue-56-lakh-6-thousand-390-shares-of-rs-100-face-value-from-25th-of-kartik' target='_blank'><i class='fa  fa-file-text'></i></a>            2
2  3031  <a href='/stocks/TJVCL (Final Call Money)/info' target='_blank'><b>TJVCL (Final Call Money)</b></a>                                None  3705000  <span class='no-wrap'>2022-09-20</span>  <span class='no-wrap'>2022-11-03</span>    Global IME Capital Limited  2022-09-22T10:32:00.000000Z  2022-09-22T10:34:43.000000Z    87              None  Ordinary                                                                                                                                                                                                        None  <span style="color: #E81E62;text-align: center" >Closed</span>                                                                                                                                                                                                                                                                       None            3
3  3032                                    <a href='/stocks/EASTERN/info' target='_blank'><b>EASTERN</b></a>                                None   670310  <span class='no-wrap'>2022-10-14</span>  <span class='no-wrap'>2022-10-19</span>           NMB Capital Limited  2022-09-29T11:02:00.000000Z  2022-10-18T04:27:53.000000Z   100              None  Ordinary                                                                                                                                                                                                        None  <span style="color: #E81E62;text-align: center" >Closed</span>                                                                                                                                                                                                                                                                       None            4
4  3030                    <a href='/stocks/PEOPLES (Local)/info' target='_blank'><b>PEOPLES (Local)</b></a>                                None  3200000  <span class='no-wrap'>2022-09-16</span>  <span class='no-wrap'>2022-10-16</span>        Sanima Capital Limited  2022-09-08T05:14:00.000000Z  2022-09-30T04:52:52.000000Z   100              None  Ordinary                https://nepsealpha.com/announcement/peoples-hydropower-company-limited-has-issued-an-invitation-letter-to-local-residents-affected-by-the-project-to-issue-32-lakh-ordinary-shares-of-rs-100  <span style="color: #E81E62;text-align: center" >Closed</span>                <a href='https://nepsealpha.com/announcement/peoples-hydropower-company-limited-has-issued-an-invitation-letter-to-local-residents-affected-by-the-project-to-issue-32-lakh-ordinary-shares-of-rs-100' target='_blank'><i class='fa  fa-file-text'></i></a>            5
5  3025                          <a href='/stocks/SJCL (Local)/info' target='_blank'><b>SJCL (Local)</b></a>                                None  3650000  <span class='no-wrap'>2022-08-21</span>  <span class='no-wrap'>2022-10-16</span>      Citizen Investment Trust  2022-08-09T06:56:00.000000Z  2022-09-22T07:34:50.000000Z   100              None  Ordinary                         https://nepsealpha.com/announcement/invitation-letter-regarding-the-issuance-and-sale-of-36-lakh-50-thousand-shares-of-sanjen-jalvidyut-company-limited-with-a-face-value-of-rs-100  <span style="color: #E81E62;text-align: center" >Closed</span>                         <a href='https://nepsealpha.com/announcement/invitation-letter-regarding-the-issuance-and-sale-of-36-lakh-50-thousand-shares-of-sanjen-jalvidyut-company-limited-with-a-face-value-of-rs-100' target='_blank'><i class='fa  fa-file-text'></i></a>            6
6  3029                                      <a href='/stocks/SIKLES/info' target='_blank'><b>SIKLES</b></a>                                None  1058000  <span class='no-wrap'>2022-09-14</span>  <span class='no-wrap'>2022-09-18</span>  B.O.K Capital Market Limited  2022-09-06T06:55:28.000000Z  2022-09-06T06:55:28.000000Z   100              None  Ordinary                                                                               https://nepsealpha.com/announcement/10-lakh-58-thousand-ordinary-shares-of-siklesvhydropower-limited-with-face-value-of-rs100  <span style="color: #E81E62;text-align: center" >Closed</span>                                                                               <a href='https://nepsealpha.com/announcement/10-lakh-58-thousand-ordinary-shares-of-siklesvhydropower-limited-with-face-value-of-rs100' target='_blank'><i class='fa  fa-file-text'></i></a>            7
7  3026                    <a href='/stocks/EASTERN (Local)/info' target='_blank'><b>EASTERN (Local)</b></a>          Eastern Hydropower Limited   620000  <span class='no-wrap'>2022-08-23</span>  <span class='no-wrap'>2022-09-13</span>           NMB Capital Limited  2022-08-14T06:40:00.000000Z  2022-09-06T06:47:21.000000Z   100              None  Ordinary                                                      https://nepsealpha.com/announcement/eastern-hydropower-limited-issued-a-letter-of-invitation-to-issue-620000-shares-for-the-locals-of-bhojpur-district  <span style="color: #E81E62;text-align: center" >Closed</span>                                                      <a href='https://nepsealpha.com/announcement/eastern-hydropower-limited-issued-a-letter-of-invitation-to-issue-620000-shares-for-the-locals-of-bhojpur-district' target='_blank'><i class='fa  fa-file-text'></i></a>            8
8  3028                                  <a href='/stocks/SRIJNSIL/info' target='_blank'><b>SRIJNSIL</b></a>                                None   393750  <span class='no-wrap'>2022-08-31</span>  <span class='no-wrap'>2022-09-04</span>       Sunrise Capital Limited  2022-08-23T04:39:00.000000Z  2022-08-25T08:07:41.000000Z   100              None  Ordinary                                                                                                  https://nepsealpha.com/announcement/sirjansheel-laghubitta-to-float-ipo-from-15th-bhadra-to-general-public  <span style="color: #E81E62;text-align: center" >Closed</span>                                                                                                  <a href='https://nepsealpha.com/announcement/sirjansheel-laghubitta-to-float-ipo-from-15th-bhadra-to-general-public' target='_blank'><i class='fa  fa-file-text'></i></a>            9
9  3027                                    <a href='/stocks/KHAPTAD/info' target='_blank'><b>KHAPTAD</b></a>                                None   132000  <span class='no-wrap'>2022-08-24</span>  <span class='no-wrap'>2022-08-28</span>        Prabhu Capital Limited  2022-08-16T01:17:10.000000Z  2022-08-16T01:17:10.000000Z   100              None  Ordinary                                                                                                                                                                                                        None  <span style="color: #E81E62;text-align: center" >Closed</span>                                                                                                                                                                                                                                                                       None           10

CodePudding user response:

Implementation using Scrapy

Example:

from scrapy.crawler import CrawlerProcess
import scrapy
import json
from bs4 import BeautifulSoup
from urllib.parse import urlencode
 
class ShareSpider(scrapy.Spider):
    name = "market"
   
    custom_settings = {
        'USER_AGENT' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
    }
 
    def start_requests(self):
        params = {
            "draw": "1",
            "columns[0][data]": "symbol",
            "columns[0][name]": "symbol",
            "columns[0][searchable]": "true",
            "columns[0][orderable]": "true",
            "columns[0][search][value]": "",
            "columns[0][search][regex]": "false",
            "columns[1][data]": "units",
            "columns[1][name]": "units",
            "columns[1][searchable]": "true",
            "columns[1][orderable]": "true",
            "columns[1][search][value]": "",
            "columns[1][search][regex]": "false",
            "columns[2][data]": "opening_date",
            "columns[2][name]": "opening_date",
            "columns[2][searchable]": "true",
            "columns[2][orderable]": "true",
            "columns[2][search][value]": "",
            "columns[2][search][regex]": "false",
            "columns[3][data]": "closing_date",
            "columns[3][name]": "closing_date",
            "columns[3][searchable]": "true",
            "columns[3][orderable]": "true",
            "columns[3][search][value]": "",
            "columns[3][search][regex]": "false",
            "columns[4][data]": "issue_manager",
            "columns[4][name]": "issue_manager",
            "columns[4][searchable]": "true",
            "columns[4][orderable]": "true",
            "columns[4][search][value]": "",
            "columns[4][search][regex]": "false",
            "columns[5][data]": "status",
            "columns[5][name]": "status",
            "columns[5][searchable]": "true",
            "columns[5][orderable]": "true",
            "columns[5][search][value]": "",
            "columns[5][search][regex]": "false",
            "columns[6][data]": "view",
            "columns[6][name]": "view",
            "columns[6][searchable]": "true",
            "columns[6][orderable]": "true",
            "columns[6][search][value]": "",
            "columns[6][search][regex]": "false",
            "start": "0",
            "length": "10",
            "search[value]": "",
            "search[regex]": "false",
            }
        headers = {"X-Requested-With": "XMLHttpRequest"}
        
        for params["draw"] in range(1, 3):
            yield scrapy.Request(
                url= f'https://nepsealpha.com/investment-calandar/ipo?{urlencode(params)}',
                method = "GET",
                callback=self.parse,
                headers=headers

                )
    def parse(self,response):
        json_response = json.loads(response.body)
        res = json_response["data"]
        print(res)
        for data in res:
            yield {
                "symbol": BeautifulSoup(data["symbol"],'html.parser').get_text(strip=True)   
                }
if __name__ == "__main__":
    process = CrawlerProcess(ShareSpider)
    process.crawl()
    process.start()

Output:

{'symbol': 'KHAPTAD'}
2022-11-14 23:11:45 [scrapy.core.engine] INFO: Closing spider (finished)
2022-11-14 23:11:45 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 3948,
 'downloader/request_count': 2,
 'downloader/request_method_count/GET': 2,
 'downloader/response_bytes': 17666,
 'downloader/response_count': 2,
 'downloader/response_status_count/200': 2,
 'elapsed_time_seconds': 1.153155,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2022, 11, 14, 17, 11, 45, 921830),
 'item_scraped_count': 20,
  • Related