Home > Back-end >  Selenium web scraping is giving me wrong row, I don't where I did wrong?
Selenium web scraping is giving me wrong row, I don't where I did wrong?

Time:07-07

Hello I am web scrapping a site

here is my code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@]')
grades = all_laptops[0].find_elements(By.XPATH, '//div[@]/span[@]')
for i in range(len(grades)):
    item[0] = grades[i].text
    description = all_laptops[0].find_elements(By.CLASS_NAME, 'product-title')
    item[1] = description[i].text
    old_price= all_laptops[0].find_elements(By.CLASS_NAME, 'old-price')
    item[2] = old_price[i].text
    special_price= all_laptops[0].find_elements(By.CLASS_NAME, 'special-price')
    item[3] = special_price[i].text
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop.csv', index=False, header=False)

It is giving me a wrong output (rows), with grade of someone else product and price of someone else product. Please tell me where i did wrong.

Thanks

CodePudding user response:

It is because your are not selecting the perfect class name which is a common thing in all laptops and have all the details about that particular laptop.

Here is the full working code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# set the page parameter its upto 14 for 24 items per page
search_url = "https://dealt.ae/collections/laptops?page=1"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
print(f"Getting {len(all_grids)} laptops")
for i in range(len(all_grids)):
    all_laptops = driver.find_elements(By.XPATH, '//div[@]')
    all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
    grades = all_grids[i].find_element(By.CLASS_NAME, 'product-metafild')
    item[0] = grades.text
    description =all_grids[i].find_element(By.CLASS_NAME, 'product-title')
    item[1] = description.text
    try:
        old_price= all_grids[i].find_element(By.CLASS_NAME, 'old-price')
        item[2] = old_price.text
        special_price= all_grids[i].find_element(By.CLASS_NAME, 'special-price')
        item[3] = special_price.text
    except NoSuchElementException:
        regular_price= all_grids[i].find_element(By.CLASS_NAME, 'price-regular')
        item[2]= regular_price.text
        item[3]= ""
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop_13.csv', index=False, header=False)





CodePudding user response:

You're not pulling the information for each laptop within the loop. Here's a cleaner way of doing it:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

all_items = []

#find all_laptop containers
all_laptops = driver.find_elements(By.CSS_SELECTOR, '.grid-item.col-6.col-md-4.col-lg-3')

for row in all_laptops:
    # Go through each row and create find the elements inside that container we need(laptop)
    grade = row.find_element(By.CSS_SELECTOR, 'span.product-metafild')
    grade = grade.text.strip() if grade else ''

    description = row.find_elements(By.CLASS_NAME, 'product-title')
    description = description[0].text.strip() if description else ''

    old_price = row.find_elements(By.CSS_SELECTOR, '.old-price')
    old_price = old_price[0].text.strip() if old_price else ''
    
    special_price = row.find_elements(By.CLASS_NAME, 'special-price')
    special_price = special_price[0].text.strip() if special_price else ''
    
    # Create a temporary dictionary and append to the main list
    temp_dict = {'Grade':grade, 'Description': description, 'Old Price': old_price, 'Special Price': special_price}
    all_items.append(temp_dict)

my_df = pd.DataFrame(all_items)
my_df.to_csv('laptop.csv', index=False, header=False)

driver.quit()

CodePudding user response:

Just using API url, scrape the desired data static way where you also can make the pagination easily

import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://dealt.ae/collections/laptops?page=1"

req=requests.get(url)
print(req)
soup = BeautifulSoup(req.content, "lxml")
lst={}

description =[x.get_text().strip() for x in soup.select('.product-title span')][2:][::2]

old_price =[x.get_text().strip() for x in soup.select('.old-price')][2:][::2]

special_price =[x.get_text().strip() for x in soup.select('.special-price')][2:][::2]

df= pd.DataFrame(data=list(zip(description,old_price,special_price)))
print(df)

Output:

BLACK and GOLD PARTY DECORATIONS Perfect Adult...  Dhs. 1,675.00    Dhs. 815.00
1   Lenovo L450 Thinkpad Laptop  - Intel Core i5-5...  Dhs. 1,630.00    Dhs. 760.00
2   Dell Latitude E7470 Laptop - Intel Core I5 6th...  Dhs. 1,700.00    Dhs. 830.00
3   Dell Latitude 3160 11.6 Inch Touchscreen Displ...  Dhs. 2,900.00  Dhs. 1,599.00
4   Lenovo T450 Thinkpad Laptop  - Intel Core i5-4...  Dhs. 2,900.00  Dhs. 1,550.00
5   Hp Elitebook Folio 9480m 14.1" Display Ci5-4th...  Dhs. 2,725.00  Dhs. 1,865.00
6   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 3,314.00  Dhs. 1,465.00
7   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 2,624.00  Dhs. 1,165.00
8   Dell Precision 7510 Laptop - Intel Core I7 6th...  Dhs. 2,509.00  Dhs. 1,115.00
9   Dell Precision 3510  Laptop - Intel Core I5 6t...  Dhs. 2,854.00  Dhs. 1,265.00
10  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
11  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 3,084.00  Dhs. 1,365.00
12  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 2,969.00  Dhs. 1,315.00
13  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 3,084.00  Dhs. 1,365.00
14  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,969.00  Dhs. 1,315.00
15  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,739.00  Dhs. 1,215.00
16  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,624.00  Dhs. 1,165.00
17  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
18  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,325.00  Dhs. 1,035.00
19  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,210.00    Dhs. 985.00
20  Dell Latitude E7470 Laptop - Intel Core I7 6th...  Dhs. 2,049.00    Dhs. 915.00
    
    
  • Related