I am trying to fetch images from this website but it is returned like this
the website is powered by wp-rocket if that would help
from bs4 import BeautifulSoup
import requests
headers = {
'accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'if-modified-since': 'Tue, 06 Sep 2022 17:26:50 GMT',
'referer': 'https://swatmanga.me/1369130/my-school-life-pretending-to-be-a-worthless-person-04/',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'sec-gpc': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36'
}
link = 'https://swatmanga.me/1369130/my-school-life-pretending-to-be-a-worthless-person-04/'
video = requests.get(link, headers=headers, timeout=3)
print(video.text)
CodePudding user response:
you need to understand how the page gets these images, in this case all images are initially available in the script tag
import requests
from bs4 import BeautifulSoup
import json
headers = {
'accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36'
}
response = requests.get('https://swatmanga.me/1369517/my-school-life-pretending-to-be-a-worthless-person-13/', headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
script_block = str([x for x in soup.find_all('script') if 'post_id' in x.get_text()][0])
script_block = script_block[script_block.find('(') 1: script_block.rfind(')')]
for image in json.loads(script_block)['sources'][0]['images']:
print(image)
OUTPUT:
https://i0.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_001-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_002-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_003-30.jpg
https://i0.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_004-30.jpg
https://i2.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_005-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_006-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_007-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_008-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_009-29.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/a_010-29.jpg
https://i0.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_001-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_002-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_003-30.jpg
https://i0.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_004-30.jpg
https://i2.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_005-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_006-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_007-30.jpg
https://i3.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_008-30.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_009-29.jpg
https://i1.wp.com/swatmanga.me/wp-content/uploads/2022/08/b_010-29.jpg