I'm trying to transform scraping Hedgefollow website in dataframe. This is the code: can you help me in this transformation?
import selenium.webdriver
import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
url = 'https://hedgefollow.com/funds/Duquesne Family Office'
options = webdriver.FirefoxOptions()
options.binary_location = r'C:/Local/Mozilla Firefox/firefox.exe'
driver = selenium.webdriver.Firefox(executable_path='C:Desktop/111/geckodriver.exe' , options=options)
driver.get(url)
time.sleep(3)
table = driver.find_element('id' , 'dgtopHolders')
print('--- headers ---')
row = table.find_elements_by_tag_name('tr')[0]
for cell in row.find_elements_by_tag_name('th'):
print(cell.text)
print('--- data ---')
for row in table.find_elements_by_tag_name('tr')[1:]:
for cell in row.find_elements_by_tag_name('td'):
print(cell.text)
print('---')
#Can you help me? #thank you so much
CodePudding user response:
The following method will get you the table as a pandas dataframe:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
[...]
driver.get(url)
table = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'dgtopHolders')))
dfs = pd.read_html(table.get_attribute('outerHTML'))
print(dfs[0])
Result:
Stock Company Name % of Portfolio Shares Value % Change Change Ownership History Price History Date
0 CPNG Coupang Inc Cl A 15.70% 19.43M $ 343.60M 9.41% 1.67M NaN NaN 2022-03-31
1 MSFT Microsoft Corp 14.37% 1.02M $ 314.57M 27.46% 219.81k NaN NaN 2022-03-31
2 FCX Freeport-mcmoran Inc Cl B 10.99% 4.84M $ 240.58M 0% 0 NaN NaN 2022-03-31
3 AMZN Amazon Com Inc 9.09% 1.22M $ 198.85M 7.23% 82.30k NaN NaN 2022-03-31
4 CVX Chevron Corp 7.18% 964.88k $ 157.11M 17.03% 140.44k NaN NaN 2022-03-31
5 TMUS T-mobile Us Inc 5.23% 891.21k $ 114.39M 11.92% 94.90k NaN NaN 2022-03-31
[...]