Home > Software engineering >  scrap link from balise a inside div using python BeautifulSoup
scrap link from balise a inside div using python BeautifulSoup

Time:12-22

i want to scrap link from balise a inside the balise div


this my code :

from bs4 import BeautifulSoup
import requests


ProductUrl = {}

url = "https://megapc.tn/shop/ORDINATEURS/PC GAMER?selection=true"

header = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:77.0) Gecko/20190101 Firefox/77.0"}

req = requests.get(url, headers=header)
soup = BeautifulSoup(req.content, 'lxml')

#find title of product
showName = soup.find_all('p',{'class':'title-prod'})

#print(showName)

#find price of product
showPrice = soup.find_all('div',{'class':'new-price'})

#print(showPrice)

#find link of product
for urlItem in soup.select("div.card a"):
    print(urlItem)

This is the result I want :

https://megapc.tn/shop/product/ORDINATEURS/PC GAMER/GX-7---RYZEN--3-1200---GTX-1650-D6-OC---8-GB
https://megapc.tn/shop/product/ORDINATEURS/PC GAMER/GX-8---i3-10105F---GTX-1650-D6-OC---8GB
https://megapc.tn/shop/product/ORDINATEURS/FULL SETUP/GX-9---RYZEN-3-1200---GT-1030-OC---8GB
https://megapc.tn/shop/product/ORDINATEURS/FULL SETUP/GX-10---i3-10105F---GT-1030-AERO-OC---8GB
https://megapc.tn/shop/product/ORDINATEURS/PC GAMER/pc-gamer-GX-11-GTX-1650-OC-8GB
https://megapc.tn/shop/product/ORDINATEURS/PC GAMER/pc-gamer-GX-12-10400F-BOX-GTX-1650-D6-OC
...

enter image description here

CodePudding user response:

Call the API directly to not hurt the back-end server.

import requests
from pprint import pp


def main(url):
    with requests.Session() as req:
        data = {
            "brand": [],
            "categorie": {
                "titre": "ORDINATEURS"
            },
            "filscateg": {
                "titre": "PC GAMER"
            },
            "pageNumber": 0,
            "price": {
                "$gte": 0,
                "$lte": 20000
            },
            "query": 'null',
            "recordByPage": 12,
            "valeurAttribute1": []
        }
        r = req.post(url, json=data)
        for i in r.json():
            pp(i)
            exit()


main('https://apiclient.mega-pc.net/produit/byPaginationNew')

Output:

{'prixEnPromo': 1850,
 '_id': '61b85eaee7a18e14c185d20f',
 'title_fr': 'GX 8 | i3-10105F | GTX 1650 D6 OC | 8GB',
 'notreSelection': True,
 'price': 2050,
 'devis': False,
 'stock': 10,
 'new': False,
 'sale': True,
 'lien': 'GX-8---i3-10105F---GTX-1650-D6-OC---8GB',
 'attributes': [{'cle': 'PROCESSEUR',
                 'valeur': 'intel core i3-10105F ',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac025c'},
                {'cle': 'FRÉQUENCE PROCESSEUR',
                 'valeur': '4.40 GHz',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac025d'},
                {'cle': 'CHIPSET GRAPHIQUE',
                 'valeur': 'GTX 1650 D6 OC',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac025e'},
                {'cle': 'TAILLE MÉMOIRE VIDÉO',
                 'valeur': '4 GB',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac025f'},
                {'cle': 'CARTE MÈRE',
                 'valeur': 'GIGABYTE H410M S2H V3',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0260'},
                {'cle': 'BARETTE MÉMOIRE',
                 'valeur': '8GB DDR4 3000 MHZ',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0261'},
                {'cle': 'NOMBRE DE BARRETTES MÉMOIRE',
                 'valeur': '1 BARRETTE MEMOIRE',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0262'},
                {'cle': 'TYPE DE STOCKAGE',
                 'valeur': 'SSD',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0263'},
                {'cle': 'CAPACITÉ DE STOCKAGE',
                 'valeur': '256 GB',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0264'},
                {'cle': "BLOC D'ALIMENTATION",
                 'valeur': 'AEROCOOL LUX 550W 80  BRONZE',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0265'},
                {'cle': 'BOITIER',
                 'valeur': 'WHITE SHARK CASE GCC-2103 PANZER / 1 FAN RGB',
                 'enable': True,
                 '_id': '619cb2e196e44e766cac0266'}],
 'enArrivage': False,
 'discount': 9.75609756097561,
 'commande48H': False,
 'title': 'GX 8 | i3-10105F | GTX 1650 D6 OC | 8GB',
 'marque': {'_id': '5fc5ffc00c10517079547a46',
            'titre': 'CONFIG PC INTEL',
            'description': 'CONFIG PC Intel',
            '__v': 0,
            'urlPhoto': '/uploads/marque/1606811592727.webp'},
 'filscateg': {'_id': '5ea23237a4815052c4d1a415',
               'titre': 'PC GAMER',
               'categorie': '5e907aa91c9a7315fc2fc033',
               '__v': 0,
               'urlPhoto2': '/uploads/souscateg/1623073866853.webp',
               'order': 0,
               'descriptionSEO': '<p>Achat PC Gamer Tunisie, PC de bureau '
                                 'Gamer sur mesure. Ordinateur Gamer '
                                 'Processeur intel, Ryzen, Carte graphique '
                                 'RTX. Pc gamer tunisie 1000 dt Prix.</p>',
               'titreSEO': 'PC Gamer Tunisie - Achat PC Gamer sur mesure - '
                           'Intel | RYZEN -MEGA PC',
               'create_date': '2020-04-24T00:26:31.000Z',
               'update_date': '2021-12-15T10:41:08.763Z',
               'visible': True},
 'gallerie': {'_id': '61b8ca88a14a1b547a12db30',
              'titre': 'GX 8 | I3-10105F | GTX 1650 D6 OC | 8GB',
              'urlPhoto': ['/uploads/gallerie/1640088610360.webp'],
              'update_date': '2021-12-21T12:10:11.853Z',
              'create_date': '2021-12-14T16:47:04.398Z',
              '__v': 2},
 'categorie': {'_id': '5e907aa91c9a7315fc2fc033',
               'order': 0,
               'titre': 'ORDINATEURS',
               '__v': 0,
               'description': '<h1>pc gamer tunisie<br></h1><p>Retrouvez le '
                              'meilleur <strong>Pc GAMER</strong> en Tunisie '
                              'sur Megapc.tn . Puissance de calcul, <a '
                              'href="https://megapc.tn/shop/COMPOSANTS/CARTE GRAPHIQUE">carte '
                              'graphique</a>, ou mémoire vive, sélectionnez le '
                              '<strong>PC Gaming</strong> adapté à vos '
                              'besoins.</p><p>Du <strong>PC de bureau</strong> '
                              "sur mesure à l'ordinateur portable gamer, nos "
                              'experts alimentent régulièrement les gammes '
                              "d'ordinateurs en nouveautés pour satisfaire aux "
                              'exigences des logiciels &amp; derniers jeux '
                              'Vidéo.</p><p>Trouvez le PC Gamer de vos rêves '
                              'chez MEGA PC. Config PC Gamer sur mesure; PC '
                              'Gamer fixe, PC gaming complet!</p>',
               'urlPhoto': '/uploads/categorie/1596623778969.jpg',
               'urlPhoto2': '/uploads/categorie/1620205162725.webp',
               'create_date': '2020-04-10T13:54:49.000Z',
               'update_date': '2021-12-16T11:33:51.893Z',
               'visible': True},
 'nFilsCategs': ['PC GAMER', 'Pc En PROMO']}
  • Related