I am looking to web scrape the large table showing the name, date, bought/ sold, amount of shares, etc from the following website:
https://www.nasdaq.com/market-activity/stocks/aapl/insider-activity
Preferably I need someone to show how to use the Nasdaq api if possible. I believe the way I'd normally webscrape (using beautifulSoup) would be inefficient for this task.
I only need the data of: date, transaction type & shares traded. My code (below) however pulls everything from the table:
import requests
import pandas as pd
import json
headers = {
"accept": "application/json, text/plain, */*",
"origin": "https://www.nasdaq.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
}
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
url = "https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=20&type=ALL&sortColumn=lastDate&sortOrder=DESC"
r = requests.get(url, headers=headers)
df = pd.json_normalize(
r.json()["data"]["transactionTable"]["table"]["rows"]
)
df.to_json("AAPL22_institutional_table_MRKTVAL.json", indent=4)
which gives the output:
{
"insider":{
"0":"KONDO CHRIS",
"1":"MAESTRI LUCA",
"2":"O'BRIEN DEIRDRE",
"3":"KONDO CHRIS",
"4":"KONDO CHRIS",
"5":"O'BRIEN DEIRDRE",
"6":"O'BRIEN DEIRDRE",
"7":"ADAMS KATHERINE L.",
"8":"ADAMS KATHERINE L.",
"9":"O'BRIEN DEIRDRE",
"10":"WILLIAMS JEFFREY E",
"11":"WILLIAMS JEFFREY E",
"12":"MAESTRI LUCA",
"13":"MAESTRI LUCA",
"14":"ADAMS KATHERINE L.",
"15":"ADAMS KATHERINE L.",
"16":"O'BRIEN DEIRDRE",
"17":"O'BRIEN DEIRDRE",
"18":"MAESTRI LUCA",
"19":"O'BRIEN DEIRDRE"
},
"relation":{
"0":"Officer",
"1":"Officer",
"2":"Officer",
"3":"Officer",
"4":"Officer",
"5":"Officer",
"6":"Officer",
"7":"Officer",
"8":"Officer",
"9":"Officer",
"10":"Officer",
"11":"Officer",
"12":"Officer",
"13":"Officer",
"14":"Officer",
"15":"Officer",
"16":"Officer",
"17":"Officer",
"18":"Officer",
"19":"Officer"
},
"lastDate":{
"0":"11\/22\/2022",
"1":"10\/28\/2022",
"2":"10\/17\/2022",
"3":"10\/15\/2022",
"4":"10\/15\/2022",
"5":"10\/15\/2022",
"6":"10\/15\/2022",
"7":"10\/03\/2022",
"8":"10\/03\/2022",
"9":"10\/03\/2022",
"10":"10\/01\/2022",
"11":"10\/01\/2022",
"12":"10\/01\/2022",
"13":"10\/01\/2022",
"14":"10\/01\/2022",
"15":"10\/01\/2022",
"16":"10\/01\/2022",
"17":"10\/01\/2022",
"18":"08\/17\/2022",
"19":"08\/08\/2022"
},
"transactionType":{
"0":"Sell",
"1":"Automatic Sell",
"2":"Automatic Sell",
"3":"Disposition (Non Open Market)",
"4":"Option Execute",
"5":"Disposition (Non Open Market)",
"6":"Option Execute",
"7":"Automatic Sell",
"8":"Sell",
"9":"Automatic Sell",
"10":"Disposition (Non Open Market)",
"11":"Option Execute",
"12":"Disposition (Non Open Market)",
"13":"Option Execute",
"14":"Disposition (Non Open Market)",
"15":"Option Execute",
"16":"Disposition (Non Open Market)",
"17":"Option Execute",
"18":"Automatic Sell",
"19":"Automatic Sell"
},
"ownType":{
"0":"Direct",
"1":"Direct",
"2":"Direct",
"3":"Direct",
"4":"Direct",
"5":"Direct",
"6":"Direct",
"7":"Direct",
"8":"Direct",
"9":"Direct",
"10":"Direct",
"11":"Direct",
"12":"Direct",
"13":"Direct",
"14":"Direct",
"15":"Direct",
"16":"Direct",
"17":"Direct",
"18":"Direct",
"19":"Direct"
},
"sharesTraded":{
"0":"20,200",
"1":"176,299",
"2":"8,053",
"3":"6,399",
"4":"13,136",
"5":"8,559",
"6":"16,612",
"7":"167,889",
"8":"13,250",
"9":"176,299",
"10":"177,870",
"11":"365,600",
"12":"189,301",
"13":"365,600",
"14":"184,461",
"15":"365,600",
"16":"189,301",
"17":"365,600",
"18":"96,735",
"19":"15,366"
},
"lastPrice":{
"0":"$148.72",
"1":"$154.70",
"2":"$142.45",
"3":"$138.38",
"4":"",
"5":"$138.38",
"6":"",
"7":"$138.44",
"8":"$142.93",
"9":"$141.09",
"10":"$138.20",
"11":"",
"12":"$138.20",
"13":"",
"14":"$138.20",
"15":"",
"16":"$138.20",
"17":"",
"18":"$174.66",
"19":"$164.86"
},
"sharesHeld":{
"0":"31,505",
"1":"110,673",
"2":"136,290",
"3":"51,705",
"4":"58,104",
"5":"144,343",
"6":"152,902",
"7":"440,584",
"8":"427,334",
"9":"136,290",
"10":"677,392",
"11":"855,262",
"12":"286,972",
"13":"476,273",
"14":"608,473",
"15":"792,934",
"16":"312,589",
"17":"501,890",
"18":"110,673",
"19":"136,290"
},
"url":{
"0":"\/market-activity\/insiders\/kondo-chris-956353",
"1":"\/market-activity\/insiders\/maestri-luca-848571",
"2":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"3":"\/market-activity\/insiders\/kondo-chris-956353",
"4":"\/market-activity\/insiders\/kondo-chris-956353",
"5":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"6":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"7":"\/market-activity\/insiders\/adams-katherine-l-803988",
"8":"\/market-activity\/insiders\/adams-katherine-l-803988",
"9":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"10":"\/market-activity\/insiders\/williams-jeffrey-e-833286",
"11":"\/market-activity\/insiders\/williams-jeffrey-e-833286",
"12":"\/market-activity\/insiders\/maestri-luca-848571",
"13":"\/market-activity\/insiders\/maestri-luca-848571",
"14":"\/market-activity\/insiders\/adams-katherine-l-803988",
"15":"\/market-activity\/insiders\/adams-katherine-l-803988",
"16":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"17":"\/market-activity\/insiders\/obrien-deirdre-1076854",
"18":"\/market-activity\/insiders\/maestri-luca-848571",
"19":"\/market-activity\/insiders\/obrien-deirdre-1076854"
}
}
What do I need to include in my code so it only pulls a select few sets of data? (date, transaction type and shares traded)
CodePudding user response:
All you have to do is:
import requests
import json
import pandas as pd
url = 'https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=25&offset=0&type=ALL&sortColumn=lastDate&sortOrder=DESC'
headers = {
'accept': 'application/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.nasdaq.com',
'referer': 'https://www.nasdaq.com',
'sec-fetch-mode': 'cors',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36'
}
r = requests.get(url, headers=headers)
df = pd.json_normalize(r.json()['data']['transactionTable']['table']['rows'])[['lastDate', 'transactionType', 'sharesTraded']]
df.to_json("AAPL22_institutional_table_MRKTVAL.json", indent=4)