Home > Enterprise >  Selenium data scraping from a table
Selenium data scraping from a table

Time:11-23

I'm trying to get the company names in the table from this website but when I try to, I get a

TypeError: 'str' object is not callable

referring to the XPATH is there a way around this?

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select

s = Service("C:\Program Files (x86)\chromedriver.exe")
browser = webdriver.Chrome(service=s)

# click accept on pop up
url = "https://www.dbs.com.sg/treasures/aics/stock-coverage/index.html"
browser.get(url)
WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Accept']"))).click()


# getting data from table
# count no of rows

rows = len(browser.find_elements(By.XPATH("//*[@id='buyholdsell']/div/div/div/div/div[1]/div/div/div/div/table/tbody/tr")))

print(rows)

CodePudding user response:

This error

TypeError: 'str' object is not callable

implies that, This

find_elements(By.XPATH("//")

should be

find_elements(By.XPATH, "")

So, Your effective code block will be :

Code:

wait = WebDriverWait(driver, 30)
url = "https://www.dbs.com.sg/treasures/aics/stock-coverage/index.html"
driver.get(url)
try:
    wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Accept']"))).click()
except:
    pass


# getting data from table
# count no of rows

rows = len(driver.find_elements(By.XPATH, "//*[@id='buyholdsell']/div/div/div/div/div[1]/div/div/div/div/table/tbody/tr"))

print(rows)

Imports:

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Output:

10

CodePudding user response:

Selenium not needed here. You can fetch the table through POST requests:

import requests
import pandas as pd


page = 0
data_present = True
url = 'https://www.dbs.com.sg/contentapi/dbsstore_main_www_global-ia_en_article_equity/search'

rows = []
while data_present == True:
    payload = {"query":
               {"bool":
                {"filter":
                 [{"range":
                   {"date_sort.PublishedDate":
                    {"lte":"now/d"}}},
                  {"range":
                   {"date_sort.ExpiryDate":
                    {"gte":"now/d"}}},
                      {"term":
                       {"results_data.QuickViews.POWType.raw":"Equity"}},
                          {"term":{"meta.Country":"sg"}},
                          {"terms":{"results_data.Industry.raw":
                                    ["Industrials","BasicMaterials","ConsumerGoods","ConsumerServices",
                                     "Financials","HealthCare","OilGas","RealEstate","Technology",
                                     "Telecommunications","Utilities"]}},
                              {"terms":
                               {"results_data.QuickViews.Recommendation.raw":
                                ["BUY","HOLD","SELL","FV"]}}]}},
                   "sort":{"results_data.Title.raw":{"order":"asc"}},
                   "from":page*10}
    
    
    jsonData = requests.post(url, json=payload).json()
    
    if len(jsonData['hits']['hits']) != 0:
        page =1
        print(f'Page: {page}')
        hits = jsonData['hits']['hits']
        for hit in hits:
            company_name = hit['_source']['results_data']['CompanyName']
            stock_code = hit['_source']['results_data']['BloombergReutersCode']
            sector = hit['_source']['results_data']['Industry']
            market_capital = hit['_source']['results_data']['QuickViews']['MarketCap']
            target_price = hit['_source']['results_data']['QuickViews']['TargetPrice']
            time = hit['_source']['results_data']['QuickViews']['Time']
            cell = hit['_source']['results_data']['QuickViews']['Recommendation']
            
            row = {
                'Cell':cell,
                'Company Name':company_name,
                'Stock Code':stock_code,
                'Sector':sector,
                'Market Capital':market_capital,
                'Target Price':target_price,
                'Time':time}
            
            rows.append(row)
            
    else:
        print('No more data.')
        data_present = False
        
        
results = pd.DataFrame(rows)

Output:

print(results)
     Cell              Company Name  ...        Target Price       Time
0     BUY          AEM Holdings Ltd  ...                4.98  12 months
1     BUY            AIMS APAC REIT  ...    1.60140929562329  12 months
2    HOLD               APAC Realty  ...   0.877765749701783  12 months
3     BUY  ARA US Hospitality Trust  ...   0.752986472516872  12 months
4     BUY      Ascendas India Trust  ...                 1.8  12 months
..    ...                       ...  ...                 ...        ...
98   HOLD                 Vicom Ltd  ...                2.13  12 months
99    BUY      Wilmar International  ...                6.67  12 months
100   BUY  Yangzijiang Shipbuilding  ...                1.95  12 months
101   BUY         iFAST Corporation  ...               12.93  12 months
102  HOLD                  mm2 Asia  ...  0.0673190399546146  12 months

[103 rows x 7 columns]
  • Related