Home > Net >  Retrieve data from web site after 2 POST queries
Retrieve data from web site after 2 POST queries

Time:09-22

I am trying to scrape this site to get the list of offers.
The problem is that we need to fill 2 forms (2 POST queries) before receiving the final result.

This is what I have done so far:

import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
   url_ = f"{base_url}/get-psb-details?serverCacheId=null"
   r = (s.get(url_))
   serverCacheId = r.json()["serverCacheId"]
   r = s.post(f"{base_url}/save-form-data", data=form_data1)
   r = s.post(f"{base_url}/save-form-data", data=form_data2)

Then I am trying to retrieve the offers after the second POST query:

url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
  "loopBack": "false",
  "selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())

But unfortunately I get a message indicating a redirection:

{'status': 'redirect', 'message': 'no data'}

The 2 POSTs use the current data:

form_data1 = {
    "showSolarSelection": "true",
    "energyType": "Electricity",
    "userType": "Residential",
    "bill": "no bill",
    "postcode": "3000",
    "usageProfile": "0",
    "averageDailyConsumption": "0",
    "skipNMI": "true",
    "smartMeter": "1",
    "disclaimer": "true",
    "hasSolar": "0",
    "hasConcession": "0",
    "distributor": {
        "id": "4",
        "name": "Citipower",
        "display": "Citipower",
        "phone": "1300 301 101 / 13 12 80",
        "distribution_zone_id": "11",
        "distribution_zone_name": "All"
    },
    "distributorDerived": "0",
    "distributorSubmit": "true",
    "pageDataType": "energyConfigData",
    "loopBack": "true"
}

and

form_data2 = {
    "pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
    "gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
    "poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
    "spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
    "spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
    "spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
    "spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
    "spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
    "spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
    "spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
    "clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
    "waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
    "waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
    "controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
    "washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
    "televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
    "gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
    "pageDataType": "energyProfileData", "loopBack": "false"
}

Expected result

The expected result is a JSON object containing offers. Here is its structure:

{
  "selectedEnergyType": "Electricity",
  "energyTypeCount": 1,
  "offers": {
    "Electricity": {
      "offersList": [{...}]
    }
  }
}

CodePudding user response:

The site has some requirements and restrictions on the form data.

form_data1:

  1. Add required fields "solarCapacity" and "feedInTariff".
    "hasSolar": "0",
    "solarCapacity": "",  # Add this
    "hasConcession": "0",
    "feedInTariff": "",   # Add this
    
  2. Change "loopBack": "true" to "loopBack": false.
    # "loopBack": "true"
    "loopBack": False
    
  3. Set "serverCacheId" and change data= to json=.
    # r = s.post(f"{base_url}/save-form-data", data=form_data1)
    r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
    

form_data2:

  1. Set "serverCacheId" and change data= to json=.
    # r = s.post(f"{base_url}/save-form-data", data=form_data2)
    r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
    
  2. (Optional, for consistency) Change "loopBack": "false" to "loopBack": false.
    # "loopBack": "false"
    "loopBack": False
    

CodePudding user response:

You have issue with the request data. There were few required filed missing in data.

"solarCapacity":"",
"feedInTariff":"",
"serverCacheId": serverCacheId,

Above fields are missing in data. You also have to change "loopBack": "true" and "loopBack": "false" to "loopBack":False.

One more change required,

s.post(f"{base_url}/save-form-data", data=form_data1)

this should be

s.post(f"{base_url}/save-form-data", json=form_data1)

Complete Code:

import json
import requests as rs


form_data1 = {
    "showSolarSelection":"true",
    "energyType":"Electricity",
    "userType":"Residential",
    "bill":"no bill",
    "postcode":"3000",
    "usageProfile":"0",
    "averageDailyConsumption":"0",
    "skipNMI":"true",
    "smartMeter":"1",
    "disclaimer":"true",
    "hasSolar":"0",
    "hasConcession":"0",
    "distributor":{
        "id":"4",
        "name":"Citipower",
        "display":"Citipower",
        "phone":"1300 301 101 / 13 12 80",
        "distribution_zone_id":"11",
        "distribution_zone_name":"All"
    },
    "distributorDerived":"0",
    "distributorSubmit":"true",
    "pageDataType":"energyConfigData",
    "solarCapacity":"",
    "feedInTariff":"",
    "loopBack":False
}

form_data2 = {
    "pvCapacity":"0",
    "pvCapacityCap":"null",
    "hhSize":"1",
    "totalRooms":"1",
    "fridgeCount":"0",
    "gasConnection":"4",
    "poolHeating":"0",
    "poolHeatingSolar":"false",
    "poolHeatingGas":"false",
    "poolHeatingElectric":"false",
    "poolHeatingNone":"false",
    "spaceHeatingElectricDucted":"false",
    "spaceHeatingSplitSystem":"false",
    "spaceHeatingElectricUnderfloor":"false",
    "spaceHeatingElectricIndividual":"false",
    "spaceHeatingGasDucted":"false",
    "spaceHeatingGasUnderfloor":"false",
    "spaceHeatingGasIndividual":"false",
    "spaceHeatingOther":"false",
    "spaceHeatingNone":"true",
    "spaceCoolingRoomAC":"false",
    "spaceCoolingSplitSystem":"false",
    "spaceCoolingDuctedReverse":"false",
    "spaceCoolingDuctedEvaporative":"false",
    "spaceCoolingPortableRef":"false",
    "spaceCoolingPortableEvap":"false",
    "spaceCoolingOther":"false",
    "spaceCoolingNone":"true",
    "seaDistance":"",
    "clothesDryer":"0",
    "clothesDryerWeekday":"",
    "clothesDryerWeekend":"",
    "dishwasherWeekday":"",
    "dishwasherWeekend":"",
    "waterHeatingElectric":"false",
    "waterHeatingElectricSolar":"false",
    "waterHeatingGasStorage":"false",
    "waterHeatingGasInstant":"false",
    "waterHeatingGasSolar":"false",
    "waterHeatingOther":"true",
    "controlledLoad":"",
    "tvTotal":"",
    "turnOffAtPowerShort":"",
    "ovensElectric":"",
    "ovensGas":"",
    "washingMachineUsage":"",
    "washingMachineWeekday":"",
    "washingMachineWeekend":"",
    "televisionUsageWeekday":"",
    "televisionUsageWeekend":"",
    "heatingUsageMethod":"",
    "gasUsageWinter":"0",
    "hhSize51":"",
    "energyType":"Electricity",
    "hasSolar":"0",
    "hasConcession":"0",
    "pageDataType":"energyProfileData",
    "solarCapacity":"",
    "feedInTariff":"",
    "loopBack":False
}

base_url = "https://compare.energy.vic.gov.au/api"

with rs.Session() as s:
    cache_id_url = f"{base_url}/get-psb-details?serverCacheId=null"
    cache_data = s.get(cache_id_url).json()
    serverCacheId = str(cache_data["serverCacheId"])

    form_data1["serverCacheId"] = serverCacheId
    form_data2["serverCacheId"] = serverCacheId

    s.post(f"{base_url}/save-form-data", json=form_data1)
    s.post(f"{base_url}/save-form-data", json=form_data2)

    offers_url = "https://compare.energy.vic.gov.au/api/get-offers"

    body = {
        "serverCacheId": serverCacheId,
        "loopBack": "false",
        "selectedEnergy": "/offer"
    }

    offers_data = s.get(offers_url, params=body).json()

    with open('response_json_data.json', 'w', encoding='utf-8') as f:
        json.dump(offers_data, f, ensure_ascii=False, indent=4)

Output: JSON File Link

  • Related