Home > database >  Slow code when fetching data from Coinmarketcap api
Slow code when fetching data from Coinmarketcap api

Time:04-03

I have the below code that fetches data from Coinmarketcap api and sends me a telegram message when parameters are met. When I fetch 100 coins then the code works fine. But when I fetch 5000 coins the code is very slow. The schedule time with refresh api is not the time that I have code.

Can someone see why the code is slow with fetching data from the api with 5000 coins?

from tokens import cmc_token
import json
import re
import requests
from datetime import date
import datetime
from datetime import datetime as dt
from datetime import datetime
from datetime import datetime, timedelta
import schedule
import time
import pandas as pd

lijst = []
price_change2 = []
coinlist = []

def tg():
    token = 'xxxxxxxxxxx'

    def write_json(data, filename='response.json'):
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4, ensure_ascii=False)

    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    params = {'start': '1', 'limit': '5000', 'convert': 'usd', 'sort': 'date_added','sort_dir': 'desc' }
    headers = {'X-CMC_PRO_API_KEY': cmc_token}

    d = requests.get(url, headers=headers, params=params).json()
    
    def send_message_two(text='bla-bla-bla'):
        url = f'https://api.telegram.org/bot{token}/sendMessage'
        payload = {'chat_id': xxxxxxx, 'text' : text}

        p = requests.post(url, json=payload)
        return p


    for x in d['data']:
        date_json1 = x['last_updated']
        new_date1 = datetime.fromisoformat(date_json1[:-1])
        new_date_plus1 = new_date1   timedelta(hours=2)
        new_date_str1 = new_date_plus1.strftime('%Y-%m-%d %H:%M:%S')

        price_now = x['symbol'], new_date_str1, x['quote']['USD']['price'], x['platform']
        price_change2.append(price_now)

        Dataset = pd.DataFrame(price_change2)
        df_new = Dataset.rename(columns={0:'coin', 2:'price', 1:'last_updated', 3:'platform'})
        zx = df_new.sort_values(['coin','last_updated'])


        zx['change1'] = zx.groupby('coin')['price'].pct_change().mul(100)
        zx['change1'] = zx['change1'].fillna(0)

        js = zx.to_json(orient="table")
        parsed = json.loads(js)

        for bn in parsed['data']:
            rt = bn['coin'], bn['change1'], bn['last_updated'],bn['price'], bn['platform']

            if bn['change1'] is None:
                return 0


            if bn['change1'] > 35 and bn['change1'] < 100 and bn['coin'] not in coinlist:
                coinlist.append(bn['coin'])
                send_message_two(rt)
                send_message_two('15 min change 35% all time')


schedule.every(900).seconds.do(tg)

while True:
    schedule.run_pending()
    time.sleep(1)

def main():
    pass


if __name__ == '__main__':
    main()

CodePudding user response:

The code is making requests sequentially for every element in parsed['data'].

Consider running your code asynchronously with some HTTP client that supports asyncio, for example httpx, and use asyncio.gather to run your requests concurrently.

Consider the following example which makes 50 GET requests to google.com using requests and using httpx. Note that the async solution is significantly faster.

import asyncio
import time

import requests
import httpx


def test_sync():
    for _ in range(50):
        requests.get("https://google.com")


async def test_async():
    async with httpx.AsyncClient() as client:
        tasks = [client.get("https://google.com") for _ in range(50)]
        await asyncio.gather(*tasks)


if __name__ == "__main__":
    start = time.process_time()
    test_sync()
    print(f"sync test took {time.process_time() - start} s")

    start = time.process_time()
    asyncio.run(test_async())
    print(f"async test took {time.process_time() - start} s")


# sync test took 23.0833 s
# async test took 0.2662 s
  • Related