I was able via Google Dev Tools - Networking to paste the graphql query into Insomnia (copy url bash) to make a working python request. Now something has been changed on the part of the provider. Now I can not even run the curl in insomnia. I only get response 400. On my previous code I get error message, which I can not solve myself. I would be very happy for a working solution.
My coder that worked so far is:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = '0'
payload = '{"query":"query ENTER_SEARCH(\\t$query: String!\\t$sortOrder: ProductSort\\t$limit: Int = 9\\t$offset: Int = 0\\t$filters: [SearchFilter]\\t$include: [String!]\\t$exclude: [String!]\\t$searchQueryId: String\\t$siteId: String) {\\tsearch(\\t\\tquery: $query\\t\\tfilters: $filters\\t\\tsearchQueryId: $searchQueryId\\t\\tsiteId: $siteId\\t) {\\t\\tproducts(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\\t\\t\\ttotal\\t\\t\\thasMore\\t\\t\\tnextOffset\\t\\t\\tresults {\\t\\t\\t\\t...ProductSearchResult\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\tfilters(include: $include, exclude: $exclude) {\\t\\t\\tproduct {\\t\\t\\t\\tidentifier\\t\\t\\t\\tname\\t\\t\\t\\tfilterType\\t\\t\\t\\tscore\\t\\t\\t\\ttooltip {\\t\\t\\t\\t\\t...FilterTooltipResult\\t\\t\\t\\t\\t__typename\\t\\t\\t\\t}\\t\\t\\t\\t...CheckboxSearchFilterResult\\t\\t\\t\\t...RangeSearchFilterResult\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\tmagazinePages(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\t__typename\\t\\t}\\t\\tauthors(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\t__typename\\t\\t}\\t\\tdiscussions(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\t__typename\\t\\t}\\t\\tquestions(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\t__typename\\t\\t}\\t\\tratings(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\t__typename\\t\\t}\\t\\tproductTypes(limit: 24) {\\t\\t\\ttotal\\t\\t\\tresults {\\t\\t\\t\\tid\\t\\t\\t\\tname\\t\\t\\t\\tprimarySynonyms\\t\\t\\t\\tisVisible\\t\\t\\t\\tdescription\\t\\t\\t\\tmetaDescription\\t\\t\\t\\timageUrl\\t\\t\\t\\tsearchScore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\tbrands(limit: 24) {\\t\\t\\ttotal\\t\\t\\tresults {\\t\\t\\t\\tid\\t\\t\\t\\ttitle\\t\\t\\t\\tsearchScore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\thelp(limit: 3) {\\t\\t\\tids {\\t\\t\\t\\tid\\t\\t\\t\\tscore\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\ttotal\\t\\t\\thasMore\\t\\t\\tresults {\\t\\t\\t\\tsearchScore\\t\\t\\t\\ttitle\\t\\t\\t\\tid\\t\\t\\t\\turl\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\t_meta {\\t\\t\\tqueryInfo {\\t\\t\\t\\tcorrectedQuery\\t\\t\\t\\tdidYouMeanQuery\\t\\t\\t\\tlastProductSearchPass\\t\\t\\t\\texecutedSearchTerm\\t\\t\\t\\ttestGroup\\t\\t\\t\\tisManagedQuery\\t\\t\\t\\tisRerankedQuery\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\tredirectionUrl\\t\\t\\tportalReferral {\\t\\t\\t\\tproductCount\\t\\t\\t\\tportalName\\t\\t\\t\\turl\\t\\t\\t\\tproductImageUrls\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}}fragment ProductSearchResult on ProductSearchResultItem {\\tsearchScore\\tmandatorSpecificData {\\t\\t...ProductMandatorSpecific\\t\\t__typename\\t}\\tproduct {\\t\\t...ProductMandatorIndependent\\t\\t__typename\\t}\\toffer {\\t\\t...ProductOffer\\t\\t__typename\\t}\\t__typename}fragment FilterTooltipResult on FilterTooltip {\\ttext\\tmoreInformationLink\\t__typename}fragment CheckboxSearchFilterResult on CheckboxSearchFilter {\\toptions {\\t\\tidentifier\\t\\tname\\t\\tproductCount\\t\\tscore\\t\\treferenceValue {\\t\\t\\tvalue\\t\\t\\tunit {\\t\\t\\t\\tabbreviation\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\tpreferredValue {\\t\\t\\tvalue\\t\\t\\tunit {\\t\\t\\t\\tabbreviation\\t\\t\\t\\t__typename\\t\\t\\t}\\t\\t\\t__typename\\t\\t}\\t\\ttooltip {\\t\\t\\t...FilterTooltipResult\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\t__typename}fragment RangeSearchFilterResult on RangeSearchFilter {\\treferenceMin\\tpreferredMin\\treferenceMax\\tpreferredMax\\treferenceStepSize\\tpreferredStepSize\\trangeMergeInfo {\\t\\tisBottomMerged\\t\\tisTopMerged\\t\\t__typename\\t}\\treferenceUnit {\\t\\tabbreviation\\t\\t__typename\\t}\\tpreferredUnit {\\t\\tabbreviation\\t\\t__typename\\t}\\trangeFilterDataPoint {\\t\\t...RangeFilterDataPointResult\\t\\t__typename\\t}\\t__typename}fragment ProductMandatorSpecific on MandatorSpecificData {\\tisBestseller\\tisDeleted\\tshowroomSites\\tsectorIds\\t__typename}fragment ProductMandatorIndependent on ProductV2 {\\tid\\tproductId\\tname\\tnameProperties\\tproductTypeId\\tproductTypeName\\tbrandId\\tbrandName\\taverageRating\\ttotalRatings\\ttotalQuestions\\tisProductSet\\timages {\\t\\turl\\t\\theight\\t\\twidth\\t\\t__typename\\t}\\tenergyEfficiency {\\t\\tenergyEfficiencyColorType\\t\\tenergyEfficiencyLabelText\\t\\tenergyEfficiencyLabelSigns\\t\\tenergyEfficiencyImage {\\t\\t\\turl\\t\\t\\theight\\t\\t\\twidth\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\tseo {\\t\\tseoProductTypeName\\t\\tseoNameProperties\\t\\tproductGroups {\\t\\t\\tproductGroup1\\t\\t\\tproductGroup2\\t\\t\\tproductGroup3\\t\\t\\tproductGroup4\\t\\t\\t__typename\\t\\t}\\t\\tgtin\\t\\t__typename\\t}\\thasVariants\\tsmallDimensions\\tbasePrice {\\t\\tpriceFactor\\t\\tvalue\\t\\t__typename\\t}\\t__typename}fragment ProductOffer on OfferV2 {\\tid\\tproductId\\tofferId\\tshopOfferId\\tprice {\\t\\tamountIncl\\t\\tamountExcl\\t\\tcurrency\\t\\tfraction\\t\\t__typename\\t}\\tdeliveryOptions {\\t\\tmail {\\t\\t\\tclassification\\t\\t\\tfutureReleaseDate\\t\\t\\t__typename\\t\\t}\\t\\tpickup {\\t\\t\\tsiteId\\t\\t\\tclassification\\t\\t\\tfutureReleaseDate\\t\\t\\t__typename\\t\\t}\\t\\tdetailsProvider {\\t\\t\\tproductId\\t\\t\\tofferId\\t\\t\\tquantity\\t\\t\\ttype\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\tlabel\\ttype\\tvolumeDiscountPrices {\\t\\tminAmount\\t\\tprice {\\t\\t\\tamountIncl\\t\\t\\tamountExcl\\t\\t\\tcurrency\\t\\t\\t__typename\\t\\t}\\t\\tisDefault\\t\\t__typename\\t}\\tsalesInformation {\\t\\tnumberOfItems\\t\\tnumberOfItemsSold\\t\\tisEndingSoon\\t\\tvalidFrom\\t\\t__typename\\t}\\tincentiveText\\tisIncentiveCashback\\tisNew\\tisSalesPromotion\\thideInProductDiscovery\\tcanAddToBasket\\thidePrice\\tinsteadOfPrice {\\t\\ttype\\t\\tprice {\\t\\t\\tamountIncl\\t\\t\\tamountExcl\\t\\t\\tcurrency\\t\\t\\tfraction\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\tminOrderQuantity\\t__typename}fragment RangeFilterDataPointResult on RangeFilterDataPoint {\\tcount\\treferenceValue {\\t\\tvalue\\t\\tunit {\\t\\t\\tabbreviation\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\tpreferredValue {\\t\\tvalue\\t\\tunit {\\t\\t\\tabbreviation\\t\\t\\t__typename\\t\\t}\\t\\t__typename\\t}\\t__typename}\",\"variables\":{\"limit\":100,\"offset\":' offset ',\"query\":\"' search '\",\"filters\":[],\"sortOrder\":null,\"include\":[\"bra\",\"pt\",\"pr\"],\"exclude\":[\"off\"],\"searchQueryId\":\"4ce81461-09e2-4f7a-bb9a-8f6f8503fdc4\",\"siteId\":null},\"operationName\":\"ENTER_SEARCH\"}'
response = requests.request("POST", url, data=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()
CodePudding user response:
You need to format your payload into json format (python's dictionary/lists), then use the json
parameter, as opposed to data
:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = 0
payload = [{
"operationName":"ENTER_SEARCH",
"variables":{
"limit":24,
"offset":offset,
"query":search,
"filters":[],
#"sortOrder":null,
"include":["bra","pt","pr","off"],
"searchQueryId":"e1b620fc-bf9c-41c6-85c0-cc49e5d12e25",
#"siteId":null},
},
"query":"query ENTER_SEARCH($query: String!, $sortOrder: ProductSort, $limit: Int = 9, $offset: Int = 0, $filters: [SearchFilter], $include: [String!], $exclude: [String!], $searchQueryId: String, $siteId: String) {\n search(\n query: $query\n filters: $filters\n searchQueryId: $searchQueryId\n siteId: $siteId\n ) {\n products(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\n total\n hasMore\n nextOffset\n results {\n ...ProductSearchResult\n __typename\n }\n __typename\n }\n filters(include: $include, exclude: $exclude) {\n product {\n identifier\n name\n filterType\n score\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n ...CheckboxSearchFilterResult\n ...RangeSearchFilterResult\n __typename\n }\n __typename\n }\n magazinePages(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n authors(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n discussions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n questions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n ratings(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n productTypes(limit: 24) {\n total\n results {\n id\n name\n primarySynonyms\n isVisible\n description\n metaDescription\n imageUrl\n searchScore\n __typename\n }\n __typename\n }\n brands(limit: 24) {\n total\n results {\n id\n title\n searchScore\n __typename\n }\n __typename\n }\n _meta {\n queryInfo {\n correctedQuery\n didYouMeanQuery\n lastProductSearchPass\n executedSearchTerm\n testGroup\n isManagedQuery\n isRerankedQuery\n __typename\n }\n redirectionUrl\n portalReferral {\n productCount\n portalName\n url\n productImageUrls\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment ProductSearchResult on ProductSearchResultItem {\n searchScore\n mandatorSpecificData {\n ...ProductMandatorSpecific\n __typename\n }\n product {\n ...ProductMandatorIndependent\n __typename\n }\n offer {\n ...ProductOffer\n __typename\n }\n __typename\n}\n\nfragment FilterTooltipResult on FilterTooltip {\n text\n moreInformationLink\n __typename\n}\n\nfragment CheckboxSearchFilterResult on CheckboxSearchFilter {\n options {\n identifier\n name\n productCount\n score\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment RangeSearchFilterResult on RangeSearchFilter {\n referenceMin\n preferredMin\n referenceMax\n preferredMax\n referenceStepSize\n preferredStepSize\n rangeMergeInfo {\n isBottomMerged\n isTopMerged\n __typename\n }\n referenceUnit {\n abbreviation\n __typename\n }\n preferredUnit {\n abbreviation\n __typename\n }\n rangeFilterDataPoint {\n ...RangeFilterDataPointResult\n __typename\n }\n __typename\n}\n\nfragment ProductMandatorSpecific on MandatorSpecificData {\n isBestseller\n isDeleted\n showroomSites\n sectorIds\n __typename\n}\n\nfragment ProductMandatorIndependent on ProductV2 {\n id\n productId\n name\n nameProperties\n productTypeId\n productTypeName\n brandId\n brandName\n averageRating\n totalRatings\n totalQuestions\n isProductSet\n images {\n url\n height\n width\n __typename\n }\n energyEfficiency {\n energyEfficiencyColorType\n energyEfficiencyLabelText\n energyEfficiencyLabelSigns\n energyEfficiencyImage {\n url\n height\n width\n __typename\n }\n __typename\n }\n seo {\n seoProductTypeName\n seoNameProperties\n productGroups {\n productGroup1\n productGroup2\n productGroup3\n productGroup4\n __typename\n }\n gtin\n __typename\n }\n hasVariants\n smallDimensions\n basePrice {\n priceFactor\n value\n __typename\n }\n __typename\n}\n\nfragment ProductOffer on OfferV2 {\n id\n productId\n offerId\n shopOfferId\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n deliveryOptions {\n mail {\n classification\n futureReleaseDate\n __typename\n }\n pickup {\n siteId\n classification\n futureReleaseDate\n __typename\n }\n detailsProvider {\n productId\n offerId\n quantity\n type\n __typename\n }\n __typename\n }\n label\n type\n volumeDiscountPrices {\n minAmount\n price {\n amountIncl\n amountExcl\n currency\n __typename\n }\n isDefault\n __typename\n }\n salesInformation {\n numberOfItems\n numberOfItemsSold\n isEndingSoon\n validFrom\n __typename\n }\n incentiveText\n isIncentiveCashback\n isNew\n isSalesPromotion\n hideInProductDiscovery\n canAddToBasket\n hidePrice\n insteadOfPrice {\n type\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n __typename\n }\n minOrderQuantity\n __typename\n}\n\nfragment RangeFilterDataPointResult on RangeFilterDataPoint {\n count\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n __typename\n}\n"}]
response = requests.post(url, json=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()