I have a view that outputs destination city results on a page. As part of the view, a web scraper searches for flight prices from the user's area to the destination city. After adding the web scraper to the view it now takes about 8 seconds to load. I was wondering how I can make the view load quicker, maybe with threading or another alternative.
views.py
def results(request, result1, result2, result3, result4, result5, result6, broad_variable1, broad_variable2, broad_variable3, specific_variable_dictionary, user_city):
result1 = City.objects.filter(city=result1).first()
result2 = City.objects.filter(city=result2).first()
result3 = City.objects.filter(city=result3).first()
result4 = City.objects.filter(city=result4).first()
result5 = City.objects.filter(city=result5).first()
result6 = City.objects.filter(city=result6).first()
# get the first user selected specific variable value for each result
result1_value1 = City.objects.filter(city=result1.city).values(broad_variable1)[0][broad_variable1]
result2_value1 = City.objects.filter(city=result2.city).values(broad_variable1)[0][broad_variable1]
result3_value1 = City.objects.filter(city=result3.city).values(broad_variable1)[0][broad_variable1]
result4_value1 = City.objects.filter(city=result4.city).values(broad_variable1)[0][broad_variable1]
result5_value1 = City.objects.filter(city=result5.city).values(broad_variable1)[0][broad_variable1]
result6_value1 = City.objects.filter(city=result6.city).values(broad_variable1)[0][broad_variable1]
# assign variables before referencing them
result1_value2 = None
result2_value2 = None
result3_value2 = None
result4_value2 = None
result5_value2 = None
result6_value2 = None
# check if the user chose a second variable
# get the second user selected specific variable value for each result
if broad_variable2 != "Nothing":
result1_value2 = City.objects.filter(city=result1.city).values(broad_variable2)[0][broad_variable2]
result2_value2 = City.objects.filter(city=result2.city).values(broad_variable2)[0][broad_variable2]
result3_value2 = City.objects.filter(city=result3.city).values(broad_variable2)[0][broad_variable2]
result4_value2 = City.objects.filter(city=result4.city).values(broad_variable2)[0][broad_variable2]
result5_value2 = City.objects.filter(city=result5.city).values(broad_variable2)[0][broad_variable2]
result6_value2 = City.objects.filter(city=result6.city).values(broad_variable2)[0][broad_variable2]
# assign variables before referencing them
result1_value3 = None
result2_value3 = None
result3_value3 = None
result4_value3 = None
result5_value3 = None
result6_value3 = None
# check if the user chose a third variable
# get the third user selected specific variable value for each result
if broad_variable3 != "Nothing":
result1_value3 = City.objects.filter(city=result1.city).values(broad_variable3)[0][broad_variable3]
result2_value3 = City.objects.filter(city=result2.city).values(broad_variable3)[0][broad_variable3]
result3_value3 = City.objects.filter(city=result3.city).values(broad_variable3)[0][broad_variable3]
result4_value3 = City.objects.filter(city=result4.city).values(broad_variable3)[0][broad_variable3]
result5_value3 = City.objects.filter(city=result5.city).values(broad_variable3)[0][broad_variable3]
result6_value3 = City.objects.filter(city=result6.city).values(broad_variable3)[0][broad_variable3]
# create list of cities
city_list = [result1.city, result2.city, result3.city, result4.city, result5.city, result6.city]
# create price list
prices_list = []
# set origin for flight
origin = user_city
# set headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"
}
for city in city_list:
# set search query
url = "https://google.com/search?q=" origin " to " city " Google Flights"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
# get price element
prices = soup.find("span", attrs={"class": "GARawf"})
if prices != None:
prices_list.append(prices.text.strip())
else:
prices_list.append("Not Available")
# change string dictionary into actual dictionary
specific_variable_dictionary = ast.literal_eval(specific_variable_dictionary)
context = {
"result1": result1, "result2": result2, "result3": result3, "result4": result4, "result5": result5,
"result6": result6, "result1_value1": result1_value1, "result2_value1": result2_value1, "result3_value1": result3_value1,
"result4_value1": result4_value1, "result5_value1": result5_value1, "result6_value1": result6_value1, "result1_value2": result1_value2,
"result2_value2": result2_value2, "result3_value2": result3_value2, "result4_value2": result4_value2,
"result5_value2": result5_value2, "result6_value2": result6_value2, "result1_value3": result1_value3, "result2_value3": result2_value3,
"result3_value3": result3_value3, "result4_value3": result4_value3, "result5_value3": result5_value3, "result6_value3": result6_value3,
"broad_variable1": broad_variable1, "broad_variable2": broad_variable2, "broad_variable3": broad_variable3,
"specific_variable_dictionary": specific_variable_dictionary, "prices_list": prices_list
}
return render(request, 'Discovery_App/results.html', context)
CodePudding user response:
The low-hanging fruit is probably the lack of caching. You should cache the results for the same origin/destination, at least for some time. You can do this inside Python, in the database you are using, or inside an external cache like Redis or Memcached. If you don't want to bother with dependencies, just creating a file is also an option.
With or without the caching, another way to improve the page speed is loading an empty page quickly, and then filling in the scraper data with Javascript using something like the Fetch API.
CodePudding user response:
You are doing a lot of query. You should try to minimize the number of query you do.
For example, you can do City.objects.filter(city__in=[v1, v2, etc])
instead of
City.objects.filter(city=v1)
City.objects.filter(city=v2)
etc.
For more complexe query you should check .annotate()
https://docs.djangoproject.com/en/4.0/topics/db/aggregation/
Accessing to the DB has a cost. It's like making a call each time you need something instead of making a list of what you need and ask it in one call.