Home > Back-end >  Data are overwritten in DataFrame
Data are overwritten in DataFrame

Time:12-08

Try to scrape the data but data are overwrite and they will give the data of only 2 page in the csv file kindly recommend any solution for that I an waiting for your response How can I fix this? is there any way then suggest me I think due to for loop they overwrite data Thank you.these is the page link https://www.askgamblers.com/online-casinos/countries/uk/

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 


for page in range(1,3):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []


    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    product=[]
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)


        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item  .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
    df=pd.DataFrame(product)
    df.to_csv('casino.csv') 

CodePudding user response:

All result in 1 file :

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 

product=[]
for page in range(1,4):             
    URL = 'https://www.askgamblers.com/online-casinos/countries/uk/{page}'.format(page=page)
    driver.get(URL)
    time.sleep(2)

    urls= []
    data = []


    page_links =driver.find_elements(By.XPATH, "//div[@class='card__desc']//a[starts-with(@href, '/online')]")
    for link in page_links:
        href=link.get_attribute("href")
        urls.append(href)
        
    
  
     
    for url in urls:
        wev={}
        driver.get(url)
        time.sleep(1)


        try:
            title=driver.find_element(By.CSS_SELECTOR,"h1.review-intro__title").text   
        except:
            pass
        
        wev['Title']=title

        soup = BeautifulSoup(driver.page_source,"lxml")

        pays=soup.select("div#tabPayments")

        for pay in pays:
            
            try:
                t1=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
            except:
                pass
            
            wev['deposit_method']=t1
            
            
            try:
                t2=pay.select_one(".review-details-wrapper:nth-child(1) .review-details__item  .review-details__item .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['curriences']=t2
            
            try:
                t3=pay.select_one(" .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(1) .review-details__text").get_text(' ',strip=True)
                
            except:
                pass
            
            wev['with_drawl method']=t3
            
            try:
                t4 = pay.select_one(" .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
                t4 = [i.replace("\n", "") for i in t4 if i.text]
                
            except:
                pass
            
            wev['with_drawl_time']=t4
            
            product.append(wev)
            
df=pd.DataFrame(product)
df.to_csv('casino.csv')

CodePudding user response:

  1. In first loop its running only 2 times : Change it to 1,4 as below then it will give you [1,2,3]:
for page in range(1,4):
  1. Then data getting overwritten because output file name is same: change file name as below:
df.to_csv(f'casino_{page}.csv')
  • Related