Im learning python currently and trying to do my own projects by taking pieces of other codes so don't fault me while I'm learning.
Im taking a list of stocks from tickers.csv and scraped a website to get sector & industry and place them on a stocks.csv
the problem is I can only get either the sector or industry (by choosing one) into the stocks.csv by
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
I would like to get both sector and industry done at the same time here is the whole code
# dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', index=False)
def get_soup(url):
return bs(requests.get(url).text, 'html.parser')
def get_sector(ticker):
soup = get_soup(LSE ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
return {'ticker': ticker, 'sector': sector}
def get_industry(ticker):
soup1 = get_soup(LSE ticker)
try:
industry = soup1.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
here is the tickers.csv
ticker,
A
AA
AADI
AAIC
AAL
AAN
AAOI
AAON
AAP
AAPL
AAT
AAU
AAWW
AB
ABB
ABBV
ABC
ABCB
ABCL
ABEO
ABEV
ABG
ABIO
ABM
ABMD
ABNB
ABOS
ABR
ABSI
ABST
ABT
ABTX
ABUS
ACA
ACAD
ACB
ACC
ACCD
ACCO
ACEL
ACER
ACET
ACEV
ACGL
ACH
ACHC
ACHR
ACHV
ACI
ACIU
here is the stocks.csv when I get the sectors
ticker,sector
A,Healthcare
AA,Basic Materials
AADI,
AAIC,Services
AAL,Transportation
AAN,Services
AAOI,Technology
AAON,Capital Goods
AAP,Retail
AAPL,Technology
AAT,Financial
AAU,Basic Materials
AAWW,Transportation
AB,Financial
ABB,Consumer Discretionary
ABBV,Healthcare
ABC,Retail
ABCB,Financial
ABCL,Healthcare
ABEO,Healthcare
ABEV,Consumer Non Cyclical
ABG,Retail
ABIO,Healthcare
ABM,Services
ABMD,Healthcare
ABNB,Services
ABOS,Healthcare
ABR,Financial
ABSI,Healthcare
ABST,
ABT,Healthcare
ABTX,Financial
ABUS,Healthcare
ACA,Basic Materials
ACAD,Healthcare
ACB,
ACC,Financial
ACCD,Financial
ACCO,Basic Materials
ACEL,Services
ACER,Healthcare
ACET,Retail
ACEV,Technology
ACGL,Financial
ACH,Basic Materials
ACHC,Healthcare
ACHR,Capital Goods
ACHV,Healthcare
ACI,Energy
ACIU,
here is the stocks.csv when I get the industries
ticker,industry
A,Laboratory Analytical Instruments
AA,Aluminum
AADI,
AAIC,Real Estate Operations
AAL,Airline
AAN,Rental & Leasing
AAOI,Computer Networks
AAON,Industrial Machinery and Components
AAP,Automotive Aftermarket
AAPL,Computer Hardware
AAT,Real Estate Investment Trusts
AAU,Metal Mining
AAWW,Special Transportation Services
AB,Investment Services
ABB,Electric & Wiring Equipment
ABBV,Biotechnology & Pharmaceuticals
ABC,Pharmacy Services & Retail Drugstore
ABCB,Regional Banks
ABCL,Major Pharmaceutical Preparations
ABEO,Major Pharmaceutical Preparations
ABEV,Nonalcoholic Beverages
ABG,Automotive Aftermarket
ABIO,In Vitro & In Vivo Diagnostic Substances
ABM,Professional Services
ABMD,Medical Equipment & Supplies
ABNB,Real Estate Operations
ABOS,Biotechnology & Pharmaceuticals
ABR,Real Estate Investment Trusts
ABSI,Medical Laboratories
ABST,
ABT,Major Pharmaceutical Preparations
ABTX,Commercial Banks
ABUS,Major Pharmaceutical Preparations
ACA,Miscellaneous Fabricated Products
ACAD,Major Pharmaceutical Preparations
ACB,
ACC,Real Estate Investment Trusts
ACCD,Blank Checks
ACCO,Paper & Paper Products
ACEL,Casinos & Gaming
ACER,Major Pharmaceutical Preparations
ACET,Pharmacy Services & Retail Drugstore
ACEV,Semiconductors
ACGL,Property & Casualty Insurance
ACH,Aluminum
ACHC,Healthcare Facilities
ACHR,Aerospace & Defense
ACHV,In Vitro & In Vivo Diagnostic Substances
ACI,Coal Mining
ACIU,
CodePudding user response:
Just combine your existing two functions into one and return the result from parsing via a single soup object
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', encoding='utf-8-sig', index=False)
def get_soup(url):
return bs(requests.get(url, headers = {'User-Agent':'Mozilla/5.0'}).text, 'html.parser')
def get_data(ticker):
soup = get_soup(LSE ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
try:
industry = soup.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'sector': sector, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_data(ticker), get_stocks())))