import requests
from bs4 import BeautifulSoup
import pandas as pd
baseurl='https://twillmkt.com'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r =requests.get('https://twillmkt.com/collections/denim')
soup=BeautifulSoup(r.content, 'html.parser')
tra = soup.find_all('div',class_='ProductItem__Wrapper')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True):
comp=baseurl link['href']
productlinks.append(comp)
temp=[]
for link in productlinks:
r =requests.get(link,headers=headers)
soup=BeautifulSoup(r.content, 'html.parser')
up = soup.find('div',class_='Product__SlideshowNavScroller')
for pro in up:
t=pro.find('img').get('src')
print(t)
The code is working fine and give me the image link but I want give name image1
,image2
and so on to get output like these as you seen in picture
CodePudding user response:
Note Main issue will be, that there is not the same count of images per page and you are calling the product pages mutliple times cause there are duplicates in your link list - Last could be avoided by set()
the list
One approache could be to append your data to a list of dicts to create a dataframe.
data.append({'id':t.split('=')[-1], 'image':'Image ' str(e) ' UI','link':t})
To get the modification you want use methods pivot()
to transform and fillna()
to generate empty cells, if there is no image source.
df.pivot(index='id', columns='image', values='link').reset_index().fillna('')
Example
import requests
from bs4 import BeautifulSoup
import pandas as pd
baseurl='https://twillmkt.com'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r =requests.get('https://twillmkt.com/collections/denim')
soup=BeautifulSoup(r.content, 'html.parser')
tra = soup.find_all('div',class_='ProductItem__Wrapper')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True):
comp=baseurl link['href']
productlinks.append(comp)
data = []
for link in set(productlinks):
r =requests.get(link,headers=headers)
soup=BeautifulSoup(r.content, 'html.parser')
up = soup.find('div',class_='Product__SlideshowNavScroller')
for e,pro in enumerate(up):
t=pro.find('img').get('src')
data.append({'id':t.split('=')[-1], 'image':'Image ' str(e) ' UI','link':t})
df = pd.DataFrame(data)
df.image=pd.Categorical(df.image,categories=df.image.unique(),ordered=True)
df = df.pivot(index='id', columns='image', values='link').reset_index().fillna('')
Output
id | Image 0 UI | Image 1 UI | Image 2 UI | ... |
---|---|---|---|---|
1631812617 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Blue-Ripped-Knee-Distressed-Skinny-Denim_160x.jpg?v=1631812617 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Blue-Ripped-Knee-Distressed-Skinny-Denim-2_160x.jpg?v=1631812617 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Blue-Ripped-Knee-Distressed-Skinny-Denim-3_160x.jpg?v=1631812617 | |
1631826938 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Light-Blue-Patch-Work-Stacked-Straight-Leg-Denim_160x.jpg?v=1631826938 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Light-Blue-Patch-Work-Stacked-Straight-Leg-Denim-2_160x.jpg?v=1631826938 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Light-Blue-Patch-Work-Stacked-Straight-Leg-Denim-3_160x.jpg?v=1631826938 | |
1631829399 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Khaki-Patch-Work-Stacked-Straight-Leg-Denim_160x.jpg?v=1631829399 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Khaki-Patch-Work-Stacked-Straight-Leg-Denim-2_160x.jpg?v=1631829399 | //cdn.shopify.com/s/files/1/0089/7912/0206/products/Khaki-Patch-Work-Stacked-Straight-Leg-Denim-3_160x.jpg?v=1631829399 | |
... |