Home > Software design >  Python - Merge API Pagination Data
Python - Merge API Pagination Data

Time:04-27

I am trying to retrieve all the number of pages for each book and calculate the total number. The below solution works but does not make use of the pagination feature offered by the API.

import requests
from requests.exceptions import HTTPError

total_number_of_pages = []

def get_all_books():
      try:
        response = requests.get("https://www.anapioficeandfire.com/api/books?page=1&pageSize=15")
        response.raise_for_status()
        json_response = response.json()
        
      except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
      except Exception as err:
        print(f'Other error occurred: {err}')
      return json_response


def sum_of_total_number_of_pages():
  total_number_of_pages = sum(pages["numberOfPages"] for pages in get_all_books())
  print(f"The total number of pages of all books is {total_number_of_pages}")

sum_of_total_number_of_pages()

I referred to this answer and utilised the pagination feature of the API

import requests
from requests.exceptions import HTTPError

baseurl = "https://www.anapioficeandfire.com/api/"
endpoint = "books"

def get_number_of_pages_for_each_book():
      number_of_pages_for_each_book = []
      try:
        response = requests.get(f"{baseurl}{endpoint}")
        json_response = response.json()
        response.raise_for_status()
        
        for pages in json_response:
            number_of_pages_for_each_book.append(pages['numberOfPages'])

        while 'next' in response.links:
          response = requests.get(response.links['next']['url'])
          json_response = response.json()     

          for pages in json_response:
            number_of_pages_for_each_book.append(pages['numberOfPages'])
            
      except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
      except Exception as err:
        print(f'Other error occurred: {err}')
      return number_of_pages_for_each_book

def calculate_total_number_of_pages(number_of_pages):
    total_amount_of_pages = sum(number_of_pages)
    print(f"The total number of pages is {total_amount_of_pages}")
    
data = get_number_of_pages_for_each_book()
calculate_total_number_of_pages(data)

If I was to return the json_response from the get_number_of_pages_for_each_book() method it only returns the data from the while loop and not the initial GET request. This is why I chose to create a list and append the data from both requests.

I would like to know if there is a better way of structuring the code.

CodePudding user response:

Code looks OK.

In Python is popular to use while True with break

import requests
from requests.exceptions import HTTPError

baseurl = "https://www.anapioficeandfire.com/api/"
endpoint = "books"

def get_number_of_pages_for_each_book():
    number_of_pages_for_each_book = []
      
    url = f"{baseurl}{endpoint}"
    
    try:
        
        while True:
            response = requests.get(url)
            json_response = response.json()     

            for pages in json_response:
                number_of_pages_for_each_book.append(pages['numberOfPages'])
            
            if 'next' not in response.links:
                break
        
            url = response.links['next']['url']
            
    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except Exception as err:
        print(f'Other error occurred: {err}')
          
    return number_of_pages_for_each_book

def calculate_total_number_of_pages(number_of_pages):
    total_amount_of_pages = sum(number_of_pages)
    print("The total number of pages is", total_amount_of_pages)
    
data = get_number_of_pages_for_each_book()
calculate_total_number_of_pages(data)
  • Related