I want the data in 'data frame' the code is working perfectly please solve these issue and provide data in Data Frame I try to solve it but faliure to do these
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
browser = webdriver.Chrome('F:\chromedriver.exe')
browser.get("https://capitalonebank2.bluematrix.com/sellside/Disclosures.action")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3"}
for title in browser.find_elements_by_css_selector('option'):
title.click()
time.sleep(1)
browser.switch_to.frame(browser.find_elements_by_css_selector("iframe")[1])
table = browser.find_element_by_css_selector("table table")
soup = BeautifulSoup(table.get_attribute("innerHTML"), "lxml")
all_data = []
ratings = {"BUY":[], "HOLD":[], "SELL":[]}
lists_ = []
for row in soup.select("tr")[-4:-1]:
info_list = row.select("td")
count = info_list[1].text
percent = info_list[2].text
IBServ_count = info_list[4].text
IBServ_percent = info_list[5].text
lists_.append([count, percent, IBServ_count, IBServ_percent])
ratings["BUY"] = lists_[0]
ratings["HOLD"] = lists_[1]
ratings["SELL"] = lists_[2]
CodePudding user response:
You can do this :
data = {
'Details': lists_
}
df = pd.DataFrame.from_dict(data)
df.to_csv('out.csv', index = 0)
you should write this outside the for loop that you've.
CodePudding user response:
You can find the solution below:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
import numpy as np
browser = webdriver.Chrome('F:\chromedriver.exe')
browser.get("https://capitalonebank2.bluematrix.com/sellside/Disclosures.action")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3"}
title_lists = []
buy_lists = []
hold_lists = []
sell_lists = []
for title in browser.find_elements_by_css_selector('option'):
title.click()
time.sleep(1)
title_lists.append(title.text)
browser.switch_to.frame(browser.find_elements_by_css_selector("iframe")[1])
table = browser.find_element_by_css_selector("table table")
soup = BeautifulSoup(table.get_attribute("innerHTML"), "lxml")
lists_ = []
for row in soup.select("tr")[-4:-1]:
info_list = row.select("td")
count = info_list[1].text
percent = info_list[2].text
IBServ_count = info_list[4].text
IBServ_percent = info_list[5].text
lists_.append([count, percent, IBServ_count, IBServ_percent])
buy_lists.append(lists_[0])
hold_lists.append(lists_[1])
sell_lists.append(lists_[2])
browser.switch_to.default_content()
header = pd.MultiIndex.from_product([['BUY','HOLD', 'SELL'],
['Count','Percent','IBServ_count', 'IBServ_percent']],names=['Action','Rating'])
m = np.array([[i[0] for i in buy_lists], [i[1] for i in buy_lists], [i[2] for i in buy_lists], [i[3] for i in buy_lists],
[i[0] for i in hold_lists], [i[1] for i in hold_lists], [i[2] for i in hold_lists], [i[3] for i in hold_lists],
[i[0] for i in sell_lists], [i[1] for i in sell_lists], [i[2] for i in sell_lists], [i[3] for i in sell_lists]])
dc = pd.DataFrame(np.rot90(m),columns = header)
dc["Title"] = title_lists
dc = dc.set_index("Title")
dc
I used numpy
to manipulate the array of information I created. It may not be the best usage of pandas
but it creates the DataFrame
you are looking for.