How can I use the json module to extract the price from provides the data in JSON
format in an inline script
?
I tried to extract the price in https://glomark.lk/top-crust-bread/p/13676 But I couldn't to get the price value.
So please help me to solve this.
import requests
import json
import sys
sys.path.insert(0,'bs4.zip')
from bs4 import BeautifulSoup
user_agent = {
'User-agent': 'Mozilla/5.0 Chrome/35.0.1916.47'
}
headers = user_agent
url = 'https://glomark.lk/top-crust-bread/p/13676'
req = requests.get(url, headers = headers)
soup = BeautifulSoup(req.content, 'html.parser')
products = soup.find_all("div", class_ = "details col-12 col-sm-12
col-md-6 col-lg-5 col-xl-5")
for product in products:
product_name = product.h1.text
product_price = product.find(id = 'product-promotion-price').text
print(product_name)
print(product_price)
CodePudding user response:
You can grab json data(price) from hidden api using only requests
module. But the product name is not dynamic.
import requests
headers= {
'content-type': 'application/json',
'x-requested-with': 'XMLHttpRequest'
}
api_url = "https://glomark.lk/product-page/variation-detail/13676"
jsonData = requests.post(api_url, headers=headers).json()
price=jsonData['price']
print(price)
Output:
95
Full working code:
from bs4 import BeautifulSoup
import requests
headers= {
'content-type': 'application/json',
'x-requested-with': 'XMLHttpRequest'
}
api_url = "https://glomark.lk/product-page/variation-detail/13676"
jsonData = requests.post(api_url, headers=headers).json()
price=jsonData['price']
#to grab product name(not dynamic)
url = 'https://glomark.lk/top-crust-bread/p/13676'
req = requests.get(url)
soup = BeautifulSoup(req.content, 'html.parser')
title=soup.select_one('.product-title h1').text
print(title)
print(price)
Output:
Top Crust Bread
95
CodePudding user response:
As mentioned content is provided dynamically by JavaScript
so one of the approaches could be to grab the data directly from the script tag, what you already figured out in your question.
data = json.loads(soup.select_one('[type="application/ld json"]').text)
will give you a dict with product information:
{'@context': 'https://schema.org', '@type': 'Product', 'productID': '13676', 'name': 'Top Crust Bread', 'description': 'Top Crust Bread', 'url': '/top-crust-bread/p/13676', 'image': 'https://objectstorage.ap-mumbai-1.oraclecloud.com/n/softlogicbicloud/b/cdn/o/products/350001--01--1555692328.jpeg', 'brand': 'GLOMARK', 'offers': [{'@type': 'Offer', 'price': '95', 'priceCurrency': 'LKR', 'itemCondition': 'https://schema.org/NewCondition', 'availability': 'https://schema.org/InStock'}]}
simply pick information is needed like price:
data['offers'][0]['price']
Example
import requests, json
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://glomark.lk/top-crust-bread/p/13676'
response = requests.get(url)
soup = BeautifulSoup(response.content)
data = json.loads(soup.select_one('[type="application/ld json"]').text)
product_price = data['offers'][0]['price']
product_name = data['name']
product_image = data['image']
print(product_name)
print(product_price)
print(product_image)
Output
Top Crust Bread
95
https://objectstorage.ap-mumbai-1.oraclecloud.com/n/softlogicbicloud/b/cdn/o/products/350001--01--1555692328.jpeg