I'm trying to export som data to excel. I'm a total beginner, so i apologise for any dumb questions.
I',m practicising scraping from a demosite
CodePudding user response:
This is not that much hard for solve just use this code you just have to add urls and text in lists then change it into a pandas dataframe and then make a new excel file.
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from pprint import pprint
url ="https://webscraper.io/test-sites/e-commerce/allinone/computers/laptops"
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html)
css_selector = {"class": "col-sm-4 col-lg-4 col-md-4"}
laptops = soup.find_all("div", attrs=css_selector)
laptop_name = []
laptop_url = []
for laptop in laptops:
laptop_link = laptop.find('a')
text = laptop_link.get_text()
href = laptop_link['href']
full_url = f"https://webscraper.io{href}"
print(text)
//appending name of laptops
laptop_name.append(text)
print (full_url)
//appending urls
laptop_url.append(full_url)
//changing it into dataframe
new_df = pd.DataFrame({'Laptop Name':laptop_name,'Laptop url':laptop_url})
print(new_df)
// defining excel file
file_name = 'laptop.xlsx'
new_df.to_excel(file_name)
CodePudding user response:
CodePudding user response:
Try this. Remeber to import pandas And try not to run the code to many times you are sending a new request to the website each time
html = r.text
soup = BeautifulSoup(html)
css_selector = {"class": "col-sm-4 col-lg-4 col-md-4"}
laptops = soup.find_all("div", attrs=css_selector)
data = []
for laptop in laptops:
laptop_link = laptop.find('a')
text = laptop_link.get_text()
href = laptop_link['href']
full_url = f"https://webscraper.io{href}"
data.append([text,full_url])
df = pd.DataFrame(data, columns = ["laptop name","Url"])
df.to_csv("name")