Home > Software engineering >  Turning pages with a parser
Turning pages with a parser

Time:10-15

I need to write a loop so that the parser collects data from all pages, but my version does not work, how could I implement it differently?

import time 

import pandas as pd
from selenium.webdriver import Chrome
from datetime import datetime


webdriver = r"C:\Users\К.Бояр (Второй)\source\repos\RozetaParcer\chromedriver.exe"

driver = Chrome(webdriver)
driver.implicitly_wait(10)
driver.get("https://rozetka.com.ua/search/?producer=gazer&seller=rozetka&text=Gazer")

total = []
items = driver.find_elements_by_css_selector(".goods-tile.ng-star-inserted")
cur_date = datetime.now().strftime("%d_%m_%Y")
for item in items:
    t_name = item.find_element_by_css_selector('.goods-tile__title').text
    t_price = item.find_element_by_css_selector('.goods-tile__price-value').text
    t_nal = item.find_element_by_css_selector('.goods-tile__availability').text    
    row = cur_date, t_name, t_price, t_nal
    total.append(row)

driver.close()
    
df = pd.DataFrame(total, columns=['Date','Name', 'Price', 'Nal'])
df.to_csv(f'Rozetka_parcer_{cur_date}.csv')

CodePudding user response:

Here is the code

total = []
# I think it has 13 pages
for i in range(1,14):
    driver.get("https://rozetka.com.ua/search/?page={}&producer=gazer&seller=rozetka&text=Gazer".format(i))
    driver.implicitly_wait(10)
    items = driver.find_elements_by_css_selector(".goods-tile.ng-star-inserted")
    cur_date = datetime.now().strftime("%d_%m_%Y")
    for item in items:
        t_name = item.find_element_by_css_selector('.goods-tile__title').text
        t_price = item.find_element_by_css_selector('.goods-tile__price-value').text
        t_nal = item.find_element_by_css_selector('.goods-tile__availability').text    
        row = cur_date, t_name, t_price, t_nal
        total.append(row)

driver.close()
df = pd.DataFrame(total, columns=['Date','Name', 'Price', 'Nal'])
df.to_csv(f'Rozetka_parcer_{cur_date}.csv')
  • Related