I´m new in scrapy and I have a little problem. Here is my code:
import scrapy
class SubcategoriasSpider(scrapy.Spider):
name = 'subCategorias'
start_urls = ['https://donvino.com.ar/categoria-producto/bodegas']
def parse(self, response):
bodegas = response.css('ul.menu#menu-bodegas li')
for bodega in bodegas:
url = bodega.css("a::attr(href)").get()
#name_bodega = bodega.css('a::text').get()
yield response.follow(url,callback=self.parse_items)
def parse_items(self,response):
vinos = response.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link')#no se puede dejar espacio vacío entre palabras, hay que poner un punto
for vino in vinos:
yield {
'nombre' : vino.css('h2::text').get(),
'precio' : str(vino.css('span.woocommerce-Price-amount.amount::text').get()).replace(",", "")
}
I need to add name_bodega = bodega.css('a::text').get() in def parse_items()...something like this:
yield {
'name_bodega' : ............
'nombre' : vino.css('h2::text').get(),
'precio' : str(vino.css('span.woocommerce-Price-amount.amount::text').get()).replace(",", "")
}
the problem is that name_bodega is not in vino...so...I don´t know how to solve it...is there any way to add the name bodega ??. Thanks in advance!!!.
CodePudding user response:
You can use meta to pass item between callback function
class SubcategoriasSpider(scrapy.Spider):
name = 'subCategorias'
start_urls = ['https://donvino.com.ar/categoria-producto/bodegas']
def parse(self, response):
bodegas = response.css('ul.menu#menu-bodegas li')
for bodega in bodegas:
url = bodega.css("a::attr(href)").get()
name_bodega = bodega.css('a::text').get()
yield response.follow(url,callback=self.parse_items,meta={'item':{"name_bodegae": name_bodega}})
def parse_items(self,response):
vinos = response.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link')#no se puede dejar espacio vacío entre palabras, hay que poner un punto
for vino in vinos:
yield {
'name_bodega':response.meta.get("item")['name_bodegae'], # or -> response.meta.get("item")["name_bodegae"]
'nombre' : vino.css('h2::text').get(),
'precio' : str(vino.css('span.woocommerce-Price-amount.amount::text').get()).replace(",", "")
}
see detaisls here scrapy: understanding how do items and requests work between callbacks