I'm currently trying to create an algorithm that helps me extract data from a website which has a following parameter http://website/example/example/YYYY-MM-DD HH/1
. For example if i want to extract the data on 02/03/2022, 3.00pm, the link should be http://website/example/example/2022-03-02 15/1
. I want to extract data spanning the whole day for a whole week. (I assume some kind of nested loop is required for this). Can anyone give me some guidance on this. Thank you
json example for 02/03/2022:
[ {
"@id" : "12233071",
"definition" : {
"parkingRecord" : {
"@xmlns" : "http://datex2.eu/schema/2/2_0",
"@xmlns:xsi" : "http://www.w3.org/2001/XMLSchema-instance",
"parkingRecord" : {
"@xsi:type" : "UrbanParkingSite",
"@id" : "12233071",
"@version" : "2.5",
"parkingName" : {
"values" : {
"value" : {
"@lang" : "en",
"#text" : "210: Wilkinson Street Park and Ride"
}
}
},
"parkingDescription" : {
"values" : {
"value" : {
"@lang" : "en",
"#text" : "Wilkinson Street Park and Ride"
}
}
},
"parkingRecordVersionTime" : "2022-03-02T15:00:09.089Z",
"parkingNumberOfSpaces" : "600",
"parkingLocation" : {
"@xsi:type" : "Point",
"locationForDisplay" : {
"latitude" : "52.97",
"longitude" : "-1.18"
}
},
"parkingThresholds" : {
"almostFullDecreasing" : "80",
"almostFullIncreasing" : "85",
"fullDecreasing" : "90",
"fullIncreasing" : "95"
},
"urbanParkingSiteType" : "offStreetParking"
}
}
},
"status" : {
"parkingRecord" : {
"@xmlns" : "http://datex2.eu/schema/2/2_0",
"@xmlns:xsi" : "http://www.w3.org/2001/XMLSchema-instance",
"parkingRecordStatus" : {
"@xsi:type" : "ParkingSiteStatus",
"parkingRecordReference" : {
"@targetClass" : "ParkingRecord",
"@id" : "12233071",
"@version" : "2.5"
},
"parkingStatusOriginTime" : "2020-03-09T12:17:42.000Z",
"parkingOccupancy" : {
"parkingNumberOfOccupiedSpaces" : "227",
"parkingOccupancy" : "37.83",
"vehicleCountAndRate" : {
"measurementTimeDefault" : "2022-03-02T14:58:57.000Z",
"vehicleRate" : {
"measurementOrCalculationTime" : "2022-03-02T14:58:57.000Z",
"fillRate" : {
"vehicleFlowRate" : "0"
},
"exitRate" : {
"vehicleFlowRate" : "1"
}
}
}
},
"parkingSiteStatus" : "other",
"parkingSiteOpeningStatus" : "closed"
}
}
}
}]
CodePudding user response:
This should answer your question regarding construction of the required URLs:
import datetime
prefix= "http://website/example/example/"
start = datetime.datetime(2022, 1, 1)
delta = datetime.timedelta(7)
end = start delta
hourdelta = datetime.timedelta(hours=1)
urls = []
while start < end:
urls.append(prefix start.strftime("%Y-%m-%d% %H/1"))
start = hourdelta
[print(urls[i]) for i in range(4)]
print()
[print(urls[len(urls) - 4 i]) for i in range(4)]
Sample output (first 4 URLs and last 4 URLs):
http://website/example/example/2022-01-01 00/1
http://website/example/example/2022-01-01 01/1
http://website/example/example/2022-01-01 02/1
http://website/example/example/2022-01-01 03/1
http://website/example/example/2022-01-07 20/1
http://website/example/example/2022-01-07 21/1
http://website/example/example/2022-01-07 22/1
http://website/example/example/2022-01-07 23/1
To loop over these URLs, extract data in json format and store it in a csv file, I recommend looking at this.