Home > Blockchain >  how to extract json data from jquery Id while scrapping/crawling
how to extract json data from jquery Id while scrapping/crawling

Time:03-01

I have some issue when fetch api data the data will comes in JQuery format as shown in bellow

the data is coming through API in below belo format when I use responce.text

('jQuery22409533634503620285_1646033040125([{"id":"3512497","auction_day":"18 ' 'january at 10:00 CET","auction_name":"Decorative sale 18 January ' '2022","auction_type":6,"auction_is_live":false,"auction_status":"published","catalog_key":546,"catalog_nr":"592","catalog_total":951,"catalog_prev":null,"catalog_next":{"auction_seo":"20211109","catalog_nr":"100"},"is_primary_object":false,"is_premium":false,"in_memory_list":false,"telephone_bid":false,"logged_in":false,"user_id":false,"asian_pre_registered":false,"is_premium_user":false,"client_identified":false,"client_pep_answered":false,"title":"Pablo ' 'Picasso","body":"Efter. \"Fredsmappen\". Signerade Picasso i trycket samt ' 'numrerade i blyerts 260\/500. Edition Mouvement de la Paix, Paris. Sju ' 'litografier, 28 x 38 cm (vardera ' 'blad).","petits":"1974.","condition":"Samtliga ramade. Ej examinerade ur ' 'ramar. N\u00e5got gulnade. N\u00e5got varierande ramm\u00e5tt: ' '36 x 47 cm och 36 x 46 ' 'cm.","provenance":null,"exhibition":null,"literature":null,"change":null,"context":null,"artist_name":"Pablo ' 'Picasso","artist_years":"Spanien\/Frankrike 1881-1973. ' '","dds":1,"export_ban":0,"estimate":"8.000 - ' '10.000","estimates":{"estimate_from_sek":"8000","estimate_to_sek":"10000","estimate_from_eur":"780","estimate_to_eur":"1000"},"hammer_price":"7000","absentee_bid_deadline":"2022-01-18 ' '10:00:00","bids_allowed":false,"latest_bid":"7000","minimum_bid":{"show":false,"my_max_bid":null,"my_max_bid_human":null,"leading_bid":null,"leading_bidder":null,"bids":[],"minimum_bid":null,"minimum_bid_human":null,"button_text":null,"valid_bid":false},"auction":"20220118","seo_name":"20220118","img_alt":"Pablo ' 'Picasso Spanien\/Frankrike 1881-1973. Efter. \"Fredsmappen\". Signerade ' 'Picasso i trycket samt numrerade i blyerts 260\/500. Edition Mouvement de ' 'la Paix, Paris. Sju litografier, 28 x 38 cm (vardera ' 'blad).","images":[{"id":"3512497","catalog_nr":"592","path210":"\/view\/210\/3512497.png?1638276330","path464":"\/view\/464\/3512497.jpg?1638276330","path900":"\/view\/900\/3512497.jpg?1638276330","large":"\/view\/large\/3512497.jpg?1638276330"},{"id":"3512497-1","catalog_nr":"592-1","path210":"\/view\/210\/3512497-1.png?1638276323","path464":"\/view\/464\/3512497-1.jpg?1638276323","path900":"\/view\/900\/3512497-1.jpg?1638276323","large":"\/view\/large\/3512497-1.jpg?1638276323"},{"id":"3512497-2","catalog_nr":"592-2","path210":"\/view\/210\/3512497-2.png?1638276316","path464":"\/view\/464\/3512497-2.jpg?1638276316","path900":"\/view\/900\/3512497-2.jpg?1638276316","large":"\/view\/large\/3512497-2.jpg?1638276316"},{"id":"3512497-3","catalog_nr":"592-3","path210":"\/view\/210\/3512497-3.png?1638276320","path464":"\/view\/464\/3512497-3.jpg?1638276320","path900":"\/view\/900\/3512497-3.jpg?1638276320","large":"\/view\/large\/3512497-3.jpg?1638276320"},{"id":"3512497-4","catalog_nr":"592-4","path210":"\/view\/210\/3512497-4.png?1638276313","path464":"\/view\/464\/3512497-4.jpg?1638276313","path900":"\/view\/900\/3512497-4.jpg?1638276313","large":"\/view\/large\/3512497-4.jpg?1638276313"},{"id":"3512497-5","catalog_nr":"592-5","path210":"\/view\/210\/3512497-5.png?1638276310","path464":"\/view\/464\/3512497-5.jpg?1638276310","path900":"\/view\/900\/3512497-5.jpg?1638276310","large":"\/view\/large\/3512497-5.jpg?1638276310"},{"id":"3512497-6","catalog_nr":"592-6","path210":"\/view\/210\/3512497-6.png?1638276306","path464":"\/view\/464\/3512497-6.jpg?1638276306","path900":"\/view\/900\/3512497-6.jpg?1638276306","large":"\/view\/large\/3512497-6.jpg?1638276306"},{"id":"3512497-7","catalog_nr":"592-7","path210":"\/view\/210\/3512497-7.png?1638276289","path464":"\/view\/464\/3512497-7.jpg?1638276289","path900":"\/view\/900\/3512497-7.jpg?1638276289","large":"\/view\/large\/3512497-7.jpg?1638276289"}],"viewing":null,"department":"Modern ' 'Prints & Books","department_id":100,"transport":"300"}])')

I wanna output in json format

[{"id":"3512497","auction_day":"18 ' 'january at 10:00 CET","auction_name":"Decorative sale 18 January ' '2022","auction_type":6,"auction_is_live":false,"auction_status":"published","catalog_key":546,"catalog_nr":"592","catalog_total":951,"catalog_prev":null,"catalog_next":{"auction_seo":"20211109","catalog_nr":"100"},"is_primary_object":false,"is_premium":false,"in_memory_list":false,"telephone_bid":false,"logged_in":false,"user_id":false,"asian_pre_registered":false,"is_premium_user":false,"client_identified":false,"client_pep_answered":false,"title":"Pablo ' 'Picasso","body":"Efter. \"Fredsmappen\". Signerade Picasso i trycket samt ' 'numrerade i blyerts 260\/500. Edition Mouvement de la Paix, Paris. Sju ' 'litografier, 28 x 38 cm (vardera ' 'blad).","petits":"1974.","condition":"Samtliga ramade. Ej examinerade ur ' 'ramar. N\u00e5got gulnade. N\u00e5got varierande ramm\u00e5tt: ' '36 x 47 cm och 36 x 46 ' 'cm.","provenance":null,"exhibition":null,"literature":null,"change":null,"context":null,"artist_name":"Pablo ' 'Picasso","artist_years":"Spanien\/Frankrike 1881-1973. ' '","dds":1,"export_ban":0,"estimate":"8.000 - ' '10.000","estimates":{"estimate_from_sek":"8000","estimate_to_sek":"10000","estimate_from_eur":"780","estimate_to_eur":"1000"},"hammer_price":"7000","absentee_bid_deadline":"2022-01-18 ' '10:00:00","bids_allowed":false,"latest_bid":"7000","minimum_bid":{"show":false,"my_max_bid":null,"my_max_bid_human":null,"leading_bid":null,"leading_bidder":null,"bids":[],"minimum_bid":null,"minimum_bid_human":null,"button_text":null,"valid_bid":false},"auction":"20220118","seo_name":"20220118","img_alt":"Pablo ' 'Picasso Spanien\/Frankrike 1881-1973. Efter. \"Fredsmappen\". Signerade ' 'Picasso i trycket samt numrerade i blyerts 260\/500. Edition Mouvement de ' 'la Paix, Paris. Sju litografier, 28 x 38 cm (vardera ' 'blad).","images":[{"id":"3512497","catalog_nr":"592","path210":"\/view\/210\/3512497.png?1638276330","path464":"\/view\/464\/3512497.jpg?1638276330","path900":"\/view\/900\/3512497.jpg?1638276330","large":"\/view\/large\/3512497.jpg?1638276330"},{"id":"3512497-1","catalog_nr":"592-1","path210":"\/view\/210\/3512497-1.png?1638276323","path464":"\/view\/464\/3512497-1.jpg?1638276323","path900":"\/view\/900\/3512497-1.jpg?1638276323","large":"\/view\/large\/3512497-1.jpg?1638276323"},{"id":"3512497-2","catalog_nr":"592-2","path210":"\/view\/210\/3512497-2.png?1638276316","path464":"\/view\/464\/3512497-2.jpg?1638276316","path900":"\/view\/900\/3512497-2.jpg?1638276316","large":"\/view\/large\/3512497-2.jpg?1638276316"},{"id":"3512497-3","catalog_nr":"592-3","path210":"\/view\/210\/3512497-3.png?1638276320","path464":"\/view\/464\/3512497-3.jpg?1638276320","path900":"\/view\/900\/3512497-3.jpg?1638276320","large":"\/view\/large\/3512497-3.jpg?1638276320"},{"id":"3512497-4","catalog_nr":"592-4","path210":"\/view\/210\/3512497-4.png?1638276313","path464":"\/view\/464\/3512497-4.jpg?1638276313","path900":"\/view\/900\/3512497-4.jpg?1638276313","large":"\/view\/large\/3512497-4.jpg?1638276313"},{"id":"3512497-5","catalog_nr":"592-5","path210":"\/view\/210\/3512497-5.png?1638276310","path464":"\/view\/464\/3512497-5.jpg?1638276310","path900":"\/view\/900\/3512497-5.jpg?1638276310","large":"\/view\/large\/3512497-5.jpg?1638276310"},{"id":"3512497-6","catalog_nr":"592-6","path210":"\/view\/210\/3512497-6.png?1638276306","path464":"\/view\/464\/3512497-6.jpg?1638276306","path900":"\/view\/900\/3512497-6.jpg?1638276306","large":"\/view\/large\/3512497-6.jpg?1638276306"},{"id":"3512497-7","catalog_nr":"592-7","path210":"\/view\/210\/3512497-7.png?1638276289","path464":"\/view\/464\/3512497-7.jpg?1638276289","path900":"\/view\/900\/3512497-7.jpg?1638276289","large":"\/view\/large\/3512497-7.jpg?1638276289"}],"viewing":null,"department":"Modern ' 'Prints & Books","department_id":100,"transport":"300"}]

my code is bellow with error while use json

import requests
import pymongo
import json
from pprint import pprint


url = "https://live.uppsalaauktion.se/api/searcharchive/execute?callback=jQuery22401117076961935719_1646032781147&estimate_max=100000000&estimate_min=1000&hammer_max=100000000&hammer_min=1000&query=picasso&per_page=100&language_id=2&paging=1&token=&_=1646032781148"

payload = "callback=jQuery22401117076961935719_1646032781147&estimate_max=100000000&estimate_min=1000&hammer_max=100000000&hammer_min=1000&query=picasso&per_page=100&language_id=2&paging=1&token=&_=1646032781148"
headers = {

  'Content-Type': 'text/json',

  'Cookie': 'session=d1c582b3ef346ceedf3645751cb8c452eb0b5d0a~621c77ed469a41-62319936'
}

response = requests.request("GET", url, headers=headers, data=payload)
jsonData=response.json()
pprint(jsonData)

error:

Input In [25], in <module>
      6 headers = {
      7 #   'Content-Type': 'text/plain',
      8   'Content-Type': 'text/json',
      9     
     10   'Cookie': 'session=d1c582b3ef346ceedf3645751cb8c452eb0b5d0a~621c77ed469a41-62319936'
     11 }
     13 response = requests.request("GET", url, headers=headers, data=payload)
---> 14 jsonData=response.json()
     15 pprint(jsonData)

File /usr/lib/python3/dist-packages/requests/models.py:888, in Response.json(self, **kwargs)
    886 if encoding is not None:
    887     try:
--> 888         return complexjson.loads(
    889             self.content.decode(encoding), **kwargs
    890         )
    891     except UnicodeDecodeError:
    892         # Wrong UTF codec detected; usually because it's not UTF-8
    893         # but some other 8-bit codec.  This is an RFC violation,
    894         # and the server didn't bother to tell us what codec *was*
    895         # used.
    896         pass

File /usr/lib/python3/dist-packages/simplejson/__init__.py:518, in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, use_decimal, **kw)
    467 """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
    468 document) to a Python object.
    469 
   (...)
    512 
    513 """
    514 if (cls is None and encoding is None and object_hook is None and
    515         parse_int is None and parse_float is None and
    516         parse_constant is None and object_pairs_hook is None
    517         and not use_decimal and not kw):
--> 518     return _default_decoder.decode(s)
    519 if cls is None:
    520     cls = JSONDecoder

File /usr/lib/python3/dist-packages/simplejson/decoder.py:370, in JSONDecoder.decode(self, s, _w, _PY3)
    368 if _PY3 and isinstance(s, bytes):
    369     s = str(s, self.encoding)
--> 370 obj, end = self.raw_decode(s)
    371 end = _w(s, end).end()
    372 if end != len(s):

File /usr/lib/python3/dist-packages/simplejson/decoder.py:400, in JSONDecoder.raw_decode(self, s, idx, _w, _PY3)
    398     elif ord0 == 0xef and s[idx:idx   3] == '\xef\xbb\xbf':
    399         idx  = 3
--> 400 return self.scan_once(s, idx=_w(s, idx).end())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)


How can I fetch/convert the data in json format. plz help

CodePudding user response:

import requests
import json


url = "https://live.uppsalaauktion.se/api/searcharchive/execute?callback=jQuery22401117076961935719_1646032781147&estimate_max=100000000&estimate_min=1000&hammer_max=100000000&hammer_min=1000&query=picasso&per_page=100&language_id=2&paging=1&token=&_=1646032781148"

payload = "callback=jQuery22401117076961935719_1646032781147&estimate_max=100000000&estimate_min=1000&hammer_max=100000000&hammer_min=1000&query=picasso&per_page=100&language_id=2&paging=1&token=&_=1646032781148"
headers = {'Content-Type': 'text/json','Cookie':'session=d1c582b3ef346ceedf3645751cb8c452eb0b5d0a~621c77ed469a41-62319936'}

response = requests.request("GET", url, headers=headers, data=payload)
# convert response to str
content = response.text
# get part of str 
content_part = content.split(',"objects":')[1].split(',"pagination":[{"page":1,"selected":true},{"page":2,'
                                                  '"selected":false},{"page":3,"selected":false},{"page":4,'
                                                  '"selected":false},{"page":5,"selected":false}]')[0]
# convert to json
content_json = json.loads(content_part)

# print id 
print(content_json[0]['id'])
  • Related