I've been betting for an hour, but apparently I don't understand something. There is a task to write a scraper with the django admin panel and everything is fine and works here. Now i need to save all the data to the database and here is the problem, only one photo appears in the django admin panel, but everything is downloaded in the media folder.
# models.py
from django.db import models
class Apartment(models.Model):
rooms = models.CharField('кол-во комнат', max_length=64)
price = models.CharField('цена', max_length=10)
address = models.CharField('Адрес', max_length=256)
desc = models.TextField('описание')
floor = models.CharField('этаж', max_length=5)
def __str__(self):
return self.address
class Meta:
verbose_name = 'квартира'
verbose_name_plural = 'квартиры'
class Image(models.Model):
apartment = models.ForeignKey(Apartment, on_delete=models.CASCADE)
img = models.ImageField(upload_to='media/')
class Meta:
verbose_name = 'фото'
verbose_name_plural = 'фото'
class Url(models.Model):
created = models.DateTimeField(auto_now_add=True)
url = models.URLField('ссылка')
def __str__(self):
return self.url
class Meta:
verbose_name = 'ссылка'
verbose_name_plural = 'ссылки'
ordering = ['-created']
#scraper
@receiver(post_save, sender=Url)
def saved_url(instance, created, **kwargs):
apartments = []
if created:
url = instance.url
soup = make_request(url)
for link in soup.find_all('div', attrs={'data-name': 'LinkArea'}):
url = link.find('a').get('href')
driver.get(url)
time.sleep(1)
try:
rooms = driver.find_element(By.XPATH, '//div[1][@data-name="OfferTitle"]/h1').text
except:
rooms = None
try:
price = driver.find_element(By.XPATH, '//div[1]/div/span/span[1][@itemprop="price"]').text
except:
price = None
try:
address = driver.find_element(By.XPATH,
'//section/div/div[1]/div[2]/span[@itemprop="name"]').get_attribute(
'content')
except:
address = None
try:
desc = driver.find_element(By.XPATH, '//div/span/p[@itemprop="description"]').text
except:
desc = None
try:
floor = driver.find_element(By.XPATH, '//div/div[4]/div[1][@data-testid="object-summary-description-value"]').text
except:
floor = None
try:
photos = [img.get_attribute('src') for img in driver.find_elements(By.CLASS_NAME, 'fotorama__img')]
except:
photos = None
apartments.append(
{
'rooms': rooms,
'price': price,
'address': address,
'desc': desc,
'floor': floor,
'photos': photos,
}
)
save_data(apartments)
#save function
def save_data(apartments_list):
for ap in apartments_list:
im = Image()
try:
apartment = Apartment.objects.create(
rooms=ap['rooms'],
price=ap['price'],
address=ap['address'],
desc=ap['desc'],
floor=ap['floor'],
)
for image in ap['photos']:
pic = urllib.request.urlretrieve(image, image.split('/')[-1])[0]
im.img = im.img.save(pic, File(open(pic, 'rb')))
im.apartment = apartment
except Exception as e:
print(e)
break
CodePudding user response:
Your problem is that you are not actually saving the image .
try to change this
def save_data(apartments_list):
for ap in apartments_list:
im = Image()
try:
apartment = Apartment.objects.create(
rooms=ap['rooms'],
price=ap['price'],
address=ap['address'],
desc=ap['desc'],
floor=ap['floor'],
)
for image in ap['photos']:
pic = urllib.request.urlretrieve(image, image.split('/')[-1])[0]
im.img = im.img.save(pic, File(open(pic, 'rb')))
im.apartment = apartment
except Exception as e:
print(e)
break
to
def save_data(apartments_list):
for ap in apartments_list:
try:
apartment = Apartment.objects.create(
rooms=ap['rooms'],
price=ap['price'],
address=ap['address'],
desc=ap['desc'],
floor=ap['floor'],
)
for image in ap['photos']:
im = Image()
pic = urllib.request.urlretrieve(image)[0]
im.img.save(image, File(open(pic, 'rb')))
im.apartment = apartment
im.save() #new
except Exception as e:
print(e)
break