Here's my script :
import warnings
warnings.filterwarnings("ignore")
import re
import json
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
URLs = ['https://www.frayssinet-joaillier.fr/fr/p/montre-the-longines-legend-diver-l37744302-bdc2']
TypeVendor = []
NameVendor = []
Marques = []
Brands = []
Refs = []
Prices = []
#Carts = []
#Links = []
Links = []
#df = pd.read_csv('testlink4.csv')
n=1
for url in URLs:
results = requests.get(url)
soup = BeautifulSoup(results.text, "html.parser")
TypeVendor.append('Distributeur')
NameVendor.append('Frayssinet')
Marques.append('Longines')
Brands.append(soup.find('span', class_ = 'main-detail__name').text)
Refs.append(soup.find('span', class_ = 'main-detail__ref').text)
Prices.append(soup.find('span', class_ = 'prix').text)
Links.append(url)
I understand why it doesn't work, text
isn't adapted for dynamic content. But I cannot figure it out how to scrape this kind of content. I know if you find where the json data is sotred, yo ucan tweak with it and scrape the data.
But I checked on the google developer tools, on the network tab and I didn't find anything.
CodePudding user response:
Set headers
to your request and store your information in a more structured way.
Example
import requests
from bs4 import BeautifulSoup
import pandas as pd
headers = {'User-Agent': 'Mozilla/5.0'}
URLs = ['https://www.frayssinet-joaillier.fr/fr/p/montre-the-longines-legend-diver-l37744302-bdc2']
data = []
for url in URLs:
results = requests.get(url,headers=headers)
soup = BeautifulSoup(results.text, "html.parser")
data.append({
'name': soup.find('span', class_ = 'main-detail__name').get_text(strip=True),
'brand': soup.find('span', class_ = 'main-detail__marque').get_text(strip=True),
'ref':soup.find('span', class_ = 'main-detail__ref').get_text(strip=True),
'price':soup.find('span', {'itemprop':'price'}).get('content'),
'url':url
})
pd.DataFrame(data)
Output
name | brand | ref | price | url |
---|---|---|---|---|
Montre The Longines Legend Diver L3.774.4.30.2 | Longines | Référence : L3.774.4.30.2 | 2240 | https://www.frayssinet-joaillier.fr/fr/p/montre-the-longines-legend-diver-l37744302-bdc2 |