Home > Enterprise >  Write data in csv file but data are overwritten
Write data in csv file but data are overwritten

Time:12-05

Try to scrape the data but data are overwrite and they will give the data of only 2 page in the csv file kindly recommend any solution for that I an waiting for your response How can I fix this? is there any way then suggest me I think due to for loop they overwrite data Thank you. these is the page link https://www.askgamblers.com/online-casinos/countries/ca/

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 
for page in range(1,3):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/ca/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []
    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)    
       
  
    with open('product.csv', 'w',newline='',encoding='utf-8') as csvfile:
        thewriter=writer(csvfile)
        header=['name','url','website_link','company','rating']
        thewriter.writerow(header)
        
        
        for url in urls:
            driver.get(url)
            time.sleep(1)
            
            try:
                name=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
            except:
                pass
            
            try:
                company=driver.find_element(By.XPATH,"//p[span[contains(.,'Company')]]/following-sibling::div").text   
            except:
                pass
            try:
                link=driver.find_element(By.XPATH,"//p[span[contains(.,'Website')]]/following-sibling::div").text   
            except:
                pass
            
            try:
                rate=driver.find_element(By.CSS_SELECTOR,"span.rating-ring__number").text
                
            except:
                pass
            
            jobinfo=[name,url,link,company,rate]
            thewriter.writerow(jobinfo)

CodePudding user response:

You open the same file for (over)writing with 'w' each time but loop over 3 pages. Use a different name or use 'a' (append) instead, but you will get the header three times as well with the current configuration.

Better would be to open the file for writing outside the for page loop, write the header, then inside for page write the rows.

Basically:

with open('product.csv', 'w',newline='',encoding='utf-8') as csvfile:
    thewriter=writer(csvfile)
    header=['name','url','website_link','company','rating']
    thewriter.writerow(header)

    for page in range(1,3):             
        ... # compute the row info
        jobinfo=[name,url,link,company,rate]
        thewriter.writerow(jobinfo)
  • Related