Home > Software design >  web scrape specific sets of data from table using API with python
web scrape specific sets of data from table using API with python

Time:11-30

I am looking to web scrape the large table showing the name, date, bought/ sold, amount of shares, etc from the following website:

https://www.nasdaq.com/market-activity/stocks/aapl/insider-activity

Preferably I need someone to show how to use the Nasdaq api if possible. I believe the way I'd normally webscrape (using beautifulSoup) would be inefficient for this task.

I only need the data of: date, transaction type & shares traded. My code (below) however pulls everything from the table:

import requests
import pandas as pd
import json

headers = {
    "accept": "application/json, text/plain, */*",
    "origin": "https://www.nasdaq.com",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
}

pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

url = "https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=20&type=ALL&sortColumn=lastDate&sortOrder=DESC"
r = requests.get(url, headers=headers)
df = pd.json_normalize(
    r.json()["data"]["transactionTable"]["table"]["rows"]
)


df.to_json("AAPL22_institutional_table_MRKTVAL.json", indent=4) 

which gives the output:

{
    "insider":{
        "0":"KONDO CHRIS",
        "1":"MAESTRI LUCA",
        "2":"O'BRIEN DEIRDRE",
        "3":"KONDO CHRIS",
        "4":"KONDO CHRIS",
        "5":"O'BRIEN DEIRDRE",
        "6":"O'BRIEN DEIRDRE",
        "7":"ADAMS KATHERINE L.",
        "8":"ADAMS KATHERINE L.",
        "9":"O'BRIEN DEIRDRE",
        "10":"WILLIAMS JEFFREY E",
        "11":"WILLIAMS JEFFREY E",
        "12":"MAESTRI LUCA",
        "13":"MAESTRI LUCA",
        "14":"ADAMS KATHERINE L.",
        "15":"ADAMS KATHERINE L.",
        "16":"O'BRIEN DEIRDRE",
        "17":"O'BRIEN DEIRDRE",
        "18":"MAESTRI LUCA",
        "19":"O'BRIEN DEIRDRE"
    },
    "relation":{
        "0":"Officer",
        "1":"Officer",
        "2":"Officer",
        "3":"Officer",
        "4":"Officer",
        "5":"Officer",
        "6":"Officer",
        "7":"Officer",
        "8":"Officer",
        "9":"Officer",
        "10":"Officer",
        "11":"Officer",
        "12":"Officer",
        "13":"Officer",
        "14":"Officer",
        "15":"Officer",
        "16":"Officer",
        "17":"Officer",
        "18":"Officer",
        "19":"Officer"
    },
    "lastDate":{
        "0":"11\/22\/2022",
        "1":"10\/28\/2022",
        "2":"10\/17\/2022",
        "3":"10\/15\/2022",
        "4":"10\/15\/2022",
        "5":"10\/15\/2022",
        "6":"10\/15\/2022",
        "7":"10\/03\/2022",
        "8":"10\/03\/2022",
        "9":"10\/03\/2022",
        "10":"10\/01\/2022",
        "11":"10\/01\/2022",
        "12":"10\/01\/2022",
        "13":"10\/01\/2022",
        "14":"10\/01\/2022",
        "15":"10\/01\/2022",
        "16":"10\/01\/2022",
        "17":"10\/01\/2022",
        "18":"08\/17\/2022",
        "19":"08\/08\/2022"
    },
    "transactionType":{
        "0":"Sell",
        "1":"Automatic Sell",
        "2":"Automatic Sell",
        "3":"Disposition (Non Open Market)",
        "4":"Option Execute",
        "5":"Disposition (Non Open Market)",
        "6":"Option Execute",
        "7":"Automatic Sell",
        "8":"Sell",
        "9":"Automatic Sell",
        "10":"Disposition (Non Open Market)",
        "11":"Option Execute",
        "12":"Disposition (Non Open Market)",
        "13":"Option Execute",
        "14":"Disposition (Non Open Market)",
        "15":"Option Execute",
        "16":"Disposition (Non Open Market)",
        "17":"Option Execute",
        "18":"Automatic Sell",
        "19":"Automatic Sell"
    },
    "ownType":{
        "0":"Direct",
        "1":"Direct",
        "2":"Direct",
        "3":"Direct",
        "4":"Direct",
        "5":"Direct",
        "6":"Direct",
        "7":"Direct",
        "8":"Direct",
        "9":"Direct",
        "10":"Direct",
        "11":"Direct",
        "12":"Direct",
        "13":"Direct",
        "14":"Direct",
        "15":"Direct",
        "16":"Direct",
        "17":"Direct",
        "18":"Direct",
        "19":"Direct"
    },
    "sharesTraded":{
        "0":"20,200",
        "1":"176,299",
        "2":"8,053",
        "3":"6,399",
        "4":"13,136",
        "5":"8,559",
        "6":"16,612",
        "7":"167,889",
        "8":"13,250",
        "9":"176,299",
        "10":"177,870",
        "11":"365,600",
        "12":"189,301",
        "13":"365,600",
        "14":"184,461",
        "15":"365,600",
        "16":"189,301",
        "17":"365,600",
        "18":"96,735",
        "19":"15,366"
    },
    "lastPrice":{
        "0":"$148.72",
        "1":"$154.70",
        "2":"$142.45",
        "3":"$138.38",
        "4":"",
        "5":"$138.38",
        "6":"",
        "7":"$138.44",
        "8":"$142.93",
        "9":"$141.09",
        "10":"$138.20",
        "11":"",
        "12":"$138.20",
        "13":"",
        "14":"$138.20",
        "15":"",
        "16":"$138.20",
        "17":"",
        "18":"$174.66",
        "19":"$164.86"
    },
    "sharesHeld":{
        "0":"31,505",
        "1":"110,673",
        "2":"136,290",
        "3":"51,705",
        "4":"58,104",
        "5":"144,343",
        "6":"152,902",
        "7":"440,584",
        "8":"427,334",
        "9":"136,290",
        "10":"677,392",
        "11":"855,262",
        "12":"286,972",
        "13":"476,273",
        "14":"608,473",
        "15":"792,934",
        "16":"312,589",
        "17":"501,890",
        "18":"110,673",
        "19":"136,290"
    },
    "url":{
        "0":"\/market-activity\/insiders\/kondo-chris-956353",
        "1":"\/market-activity\/insiders\/maestri-luca-848571",
        "2":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "3":"\/market-activity\/insiders\/kondo-chris-956353",
        "4":"\/market-activity\/insiders\/kondo-chris-956353",
        "5":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "6":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "7":"\/market-activity\/insiders\/adams-katherine-l-803988",
        "8":"\/market-activity\/insiders\/adams-katherine-l-803988",
        "9":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "10":"\/market-activity\/insiders\/williams-jeffrey-e-833286",
        "11":"\/market-activity\/insiders\/williams-jeffrey-e-833286",
        "12":"\/market-activity\/insiders\/maestri-luca-848571",
        "13":"\/market-activity\/insiders\/maestri-luca-848571",
        "14":"\/market-activity\/insiders\/adams-katherine-l-803988",
        "15":"\/market-activity\/insiders\/adams-katherine-l-803988",
        "16":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "17":"\/market-activity\/insiders\/obrien-deirdre-1076854",
        "18":"\/market-activity\/insiders\/maestri-luca-848571",
        "19":"\/market-activity\/insiders\/obrien-deirdre-1076854"
    }
}

What do I need to include in my code so it only pulls a select few sets of data? (date, transaction type and shares traded)

CodePudding user response:

All you have to do is:

import requests
import json 
import pandas as pd

url = 'https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=25&offset=0&type=ALL&sortColumn=lastDate&sortOrder=DESC'

headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US,en;q=0.9',
    'origin': 'https://www.nasdaq.com',
    'referer': 'https://www.nasdaq.com',
    'sec-fetch-mode': 'cors',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36'
}

r = requests.get(url, headers=headers)

df = pd.json_normalize(r.json()['data']['transactionTable']['table']['rows'])[['lastDate', 'transactionType', 'sharesTraded']]
df.to_json("AAPL22_institutional_table_MRKTVAL.json", indent=4)
  • Related