Data are overwritten in pandas how to resolve these problem...... How I get out of for loop
to
solve these problem kindly recommend any solution for that However, with every iteration through the loop, the previously extracted data is overwritten. How can I solve this problem?
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
# url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
# url='https://www.amazon.com/dp/B010RWD4GM?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
df_urls = pd.read_csv('D:/selenium/inputs/amazone-asin.csv',encoding='utf-8')
list_dicts_urls =df_urls.to_dict('records')
item=dict()
product=[]
for url in list_dicts_urls:
product_url = 'https://' url['MARKETPLACE'] '/dp/' url['ASIN']
driver.get(product_url)
try:
item['title'] = driver.find_element(By.CSS_SELECTOR,'span#productTitle').text
except:
item['title'] = ''
try:
item['brand'] = driver.find_element(By.CSS_SELECTOR,'a#bylineInfo').text.replace('Visit the','').replace('Store','').strip()
except:
item['brand'] = ''
try:
rating = driver.find_element(By.CSS_SELECTOR,'span#acrCustomerReviewText').text.replace('ratings','').strip()
rating = int(rating.replace(',', ''))
item['rating'] = rating
except:
item['rating'] = ''
time.sleep(2)
try:
p1=driver.find_element(By.XPATH, '//span[@]').text
p2= driver.find_element(By.XPATH, '//span[@]').text
item['price']=p1 p2
except:
item['price']=''
product.append(item)
df=pd.DataFrame(product)
df.to_csv("ama.csv")
CodePudding user response:
I think you need to define item=dict()
inside the for
loop. Otherwise this is the same, single item
object used in all the loop iterations.
Try this:
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
# url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
# url='https://www.amazon.com/dp/B010RWD4GM?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
df_urls = pd.read_csv('D:/selenium/inputs/amazone-asin.csv',encoding='utf-8')
list_dicts_urls =df_urls.to_dict('records')
product=[]
for url in list_dicts_urls:
item=dict()
product_url = 'https://' url['MARKETPLACE'] '/dp/' url['ASIN']
driver.get(product_url)
try:
item['title'] = driver.find_element(By.CSS_SELECTOR,'span#productTitle').text
except:
item['title'] = ''
try:
item['brand'] = driver.find_element(By.CSS_SELECTOR,'a#bylineInfo').text.replace('Visit the','').replace('Store','').strip()
except:
item['brand'] = ''
try:
rating = driver.find_element(By.CSS_SELECTOR,'span#acrCustomerReviewText').text.replace('ratings','').strip()
rating = int(rating.replace(',', ''))
item['rating'] = rating
except:
item['rating'] = ''
time.sleep(2)
try:
p1=driver.find_element(By.XPATH, '//span[@]').text
p2= driver.find_element(By.XPATH, '//span[@]').text
item['price']=p1 p2
except:
item['price']=''
product.append(item)
df=pd.DataFrame(product)
df.to_csv("ama.csv")
CodePudding user response:
Try:
product=[]
for url in list_dicts_urls:
product_url = 'https://' url['MARKETPLACE'] '/dp/' url['ASIN']
driver.get(product_url)
try:
title = driver.find_element(By.CSS_SELECTOR,'span#productTitle').text
except:
title = ''
try:
brand = driver.find_element(By.CSS_SELECTOR,'a#bylineInfo').text.replace('Visit the','').replace('Store','').strip()
except:
brand= ''
try:
rating = driver.find_element(By.CSS_SELECTOR,'span#acrCustomerReviewText').text.replace('ratings','').strip()
rating = int(rating.replace(',', ''))
rating = rating
except:
rating = ''
time.sleep(2)
try:
p1=driver.find_element(By.XPATH, '//span[@]').text
p2= driver.find_element(By.XPATH, '//span[@]').text
price=p1 p2
except:
price=''
product.append({
'title':title,
'brand':brand,
'rating':rating,
'price':price
})
df=pd.DataFrame(product)
df.to_csv("ama.csv")