I am trying to scrape this site to get the list of offers.
The problem is that we need to fill 2 forms (2 POST queries) before receiving the final result.
This is what I have done so far:
import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", data=form_data2)
Then I am trying to retrieve the offers after the second POST query:
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
But unfortunately I get a message indicating a redirection:
{'status': 'redirect', 'message': 'no data'}
The 2 POSTs use the current data:
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"hasConcession": "0",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": "true"
}
and
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": "false"
}
Expected result
The expected result is a JSON object containing offers. Here is its structure:
{
"selectedEnergyType": "Electricity",
"energyTypeCount": 1,
"offers": {
"Electricity": {
"offersList": [{...}]
}
}
}
CodePudding user response:
The site has some requirements and restrictions on the form data.
form_data1
:
- Add required fields
"solarCapacity"
and"feedInTariff"
."hasSolar": "0", "solarCapacity": "", # Add this "hasConcession": "0", "feedInTariff": "", # Add this
- Change
"loopBack": "true"
to"loopBack": false
.# "loopBack": "true" "loopBack": False
- Set
"serverCacheId"
and changedata=
tojson=
.# r = s.post(f"{base_url}/save-form-data", data=form_data1) r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
form_data2
:
- Set
"serverCacheId"
and changedata=
tojson=
.# r = s.post(f"{base_url}/save-form-data", data=form_data2) r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
- (Optional, for consistency) Change
"loopBack": "false"
to"loopBack": false
.# "loopBack": "false" "loopBack": False
CodePudding user response:
You have issue with the request data. There were few required filed missing in data.
"solarCapacity":"",
"feedInTariff":"",
"serverCacheId": serverCacheId,
Above fields are missing in data. You also have to change "loopBack": "true"
and "loopBack": "false"
to "loopBack":False
.
One more change required,
s.post(f"{base_url}/save-form-data", data=form_data1)
this should be
s.post(f"{base_url}/save-form-data", json=form_data1)
Complete Code:
import json
import requests as rs
form_data1 = {
"showSolarSelection":"true",
"energyType":"Electricity",
"userType":"Residential",
"bill":"no bill",
"postcode":"3000",
"usageProfile":"0",
"averageDailyConsumption":"0",
"skipNMI":"true",
"smartMeter":"1",
"disclaimer":"true",
"hasSolar":"0",
"hasConcession":"0",
"distributor":{
"id":"4",
"name":"Citipower",
"display":"Citipower",
"phone":"1300 301 101 / 13 12 80",
"distribution_zone_id":"11",
"distribution_zone_name":"All"
},
"distributorDerived":"0",
"distributorSubmit":"true",
"pageDataType":"energyConfigData",
"solarCapacity":"",
"feedInTariff":"",
"loopBack":False
}
form_data2 = {
"pvCapacity":"0",
"pvCapacityCap":"null",
"hhSize":"1",
"totalRooms":"1",
"fridgeCount":"0",
"gasConnection":"4",
"poolHeating":"0",
"poolHeatingSolar":"false",
"poolHeatingGas":"false",
"poolHeatingElectric":"false",
"poolHeatingNone":"false",
"spaceHeatingElectricDucted":"false",
"spaceHeatingSplitSystem":"false",
"spaceHeatingElectricUnderfloor":"false",
"spaceHeatingElectricIndividual":"false",
"spaceHeatingGasDucted":"false",
"spaceHeatingGasUnderfloor":"false",
"spaceHeatingGasIndividual":"false",
"spaceHeatingOther":"false",
"spaceHeatingNone":"true",
"spaceCoolingRoomAC":"false",
"spaceCoolingSplitSystem":"false",
"spaceCoolingDuctedReverse":"false",
"spaceCoolingDuctedEvaporative":"false",
"spaceCoolingPortableRef":"false",
"spaceCoolingPortableEvap":"false",
"spaceCoolingOther":"false",
"spaceCoolingNone":"true",
"seaDistance":"",
"clothesDryer":"0",
"clothesDryerWeekday":"",
"clothesDryerWeekend":"",
"dishwasherWeekday":"",
"dishwasherWeekend":"",
"waterHeatingElectric":"false",
"waterHeatingElectricSolar":"false",
"waterHeatingGasStorage":"false",
"waterHeatingGasInstant":"false",
"waterHeatingGasSolar":"false",
"waterHeatingOther":"true",
"controlledLoad":"",
"tvTotal":"",
"turnOffAtPowerShort":"",
"ovensElectric":"",
"ovensGas":"",
"washingMachineUsage":"",
"washingMachineWeekday":"",
"washingMachineWeekend":"",
"televisionUsageWeekday":"",
"televisionUsageWeekend":"",
"heatingUsageMethod":"",
"gasUsageWinter":"0",
"hhSize51":"",
"energyType":"Electricity",
"hasSolar":"0",
"hasConcession":"0",
"pageDataType":"energyProfileData",
"solarCapacity":"",
"feedInTariff":"",
"loopBack":False
}
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
cache_id_url = f"{base_url}/get-psb-details?serverCacheId=null"
cache_data = s.get(cache_id_url).json()
serverCacheId = str(cache_data["serverCacheId"])
form_data1["serverCacheId"] = serverCacheId
form_data2["serverCacheId"] = serverCacheId
s.post(f"{base_url}/save-form-data", json=form_data1)
s.post(f"{base_url}/save-form-data", json=form_data2)
offers_url = "https://compare.energy.vic.gov.au/api/get-offers"
body = {
"serverCacheId": serverCacheId,
"loopBack": "false",
"selectedEnergy": "/offer"
}
offers_data = s.get(offers_url, params=body).json()
with open('response_json_data.json', 'w', encoding='utf-8') as f:
json.dump(offers_data, f, ensure_ascii=False, indent=4)
Output: JSON File Link