I'm trying to search for a key wherever it appears inside a huge nested dict. But i'm getting this error and i can't figure out why. I checked the type for the dictionary, and its dict. Maybe its due to the some part of the dicionary ? its too big for me to manually check.
import scrapy
class MlSpider(scrapy.Spider):
name = 'detalhador'
start_urls=['https://produto.mercadolivre.com.br/MLB-1304118411-sandalia-feminina-anabela-confortavel-pingente-mac-cod-133-_JM?attributes=COLOR_SECONDARY_COLOR:UHJldGE=,SIZE:MzU=&quantity=1']
def parse(self, response,**kwargs):
import json
d = response.xpath('//script[contains(., "window.__PRELOADED_STATE__")]/text()').re_first(r'(?s)window.__PRELOADED_STATE__ = (. ?\});')
data = json.loads(d)
temp='itemPrice'
res = [val[temp] for key, val in data.items() if temp in val]
Output:
Traceback (most recent call last):
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/utils/defer.py", line 132, in iter_errback
yield next(it)
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/utils/python.py", line 354, in __next__
return next(self.data)
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/utils/python.py", line 354, in __next__
return next(self.data)
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/core/spidermw.py", line 66, in _evaluate_iterable
for r in iterable:
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
for x in result:
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/core/spidermw.py", line 66, in _evaluate_iterable
for r in iterable:
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/spidermiddlewares/referer.py", line 342, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/core/spidermw.py", line 66, in _evaluate_iterable
for r in iterable:
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/spidermiddlewares/urllength.py", line 40, in <genexpr>
return (r for r in result or () if _filter(r))
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/core/spidermw.py", line 66, in _evaluate_iterable
for r in iterable:
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "/home/deborador/anaconda3/envs/mapsmaps/lib/python3.10/site-packages/scrapy/core/spidermw.py", line 66, in _evaluate_iterable
for r in iterable:
File "/home/deborador/Documentos/coding/mercadolivre/mercadolivre/mercadolivre/spiders/detalhador.py", line 22, in parse
res = [val[temp] for key, val in data.items() if temp in val]
File "/home/deborador/Documentos/coding/mercadolivre/mercadolivre/mercadolivre/spiders/detalhador.py", line 22, in <listcomp>
res = [val[temp] for key, val in data.items() if temp in val]
TypeError: argument of type 'bool' is not iterable
Piece of the dictionary
{"translations": {}, "initialState": {"id": "MLB1304118411", "variation_id": "46176185176", "layout": "vip-core", "vertical": "core", "components_locations": {"variations": "short_description"}, "components": {"head": [{"id": "compats_feedback", "type": "ui_message", "state": "HIDDEN", "closeable": false}, {"id": "related_searches", "type": "related_searches", "state": "VISIBLE", "title": {"text": "Voc\u00ea tamb\u00e9m pode gostar"}, "related_searches": [{"target": "https://lista.mercadolivre.com.br/chinelo-comfortflex#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "chinelo comfortflex"}}, {"target": "https://lista.mercadolivre.com.br/cal\u00e7ados-andacco#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "calcados andacco"}}, {"target": "https://lista.mercadolivre.com.br/chinelo-usaflex#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "chinelo usaflex"}}, {"target": "https://lista.mercadolivre.com.br/chinelo-ortopedico#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "chinelo ortopedico"}}, {"target": "https://lista.mercadolivre.com.br/chinelo-crocs-feminino#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "chinelo crocs feminino"}}, {"target": "https://lista.mercadolivre.com.br/ramarim-sandalia#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "ramarim sandalia"}}, {"target": "https://lista.mercadolivre.com.br/melissa-oficial#topkeyword", "timeout": 0, "duration": 0, "label": {"text": "melissa oficial"}}]}, {"id": "carousel_cheaper", "type": "carousel", "state": "VISIBLE", "carousel": {}, "carousel_config": {"site_id": "MLB", "item_id": "MLB1304118411", "category_id": "MLB273770", "client": "similar
CodePudding user response:
The reason for the error is because one of the values parsed out of the json data is a Boolean and you are attempting to access the itemPrice
key out of a Boolean value which is a subclass of int
and therefore doesn't support the key, value
interface.
The solution is to use a better algorithm for traversing the dictionary.
For example:
import scrapy
import json
def findkeys(data, temp):
# if isinstance(data, list): # after looking at the json it appears
# for i in data: # its only dictionaries nested in more
# for x in findkeys(i, temp): # dictionaries and lists are never
# yield x # used so I comment out this block
if isinstance(data, dict):
if temp in data:
yield data[temp]
for j in data.values():
for x in findkeys(j, temp):
yield x
class MlSpider(scrapy.Spider):
name = 'detalhador'
start_urls=['https://produto.mercadolivre.com.br/MLB-1304118411-sandalia-feminina-anabela-confortavel-pingente-mac-cod-133-_JM?attributes=COLOR_SECONDARY_COLOR:UHJldGE=,SIZE:MzU=&quantity=1']
def parse(self, response,**kwargs):
d = response.xpath('//script[contains(., "window.__PRELOADED_STATE__")]/text()').re_first(r'(?s)window.__PRELOADED_STATE__ = (. ?\});')
data = json.loads(d)
temp='itemPrice'
lst = list(findkeys(data, temp))
print(lst)
# res = [val[temp] for key, val in data.items() if temp in val]
output
[7.77, 7.77]