I'm trying to scrape names off and import them into a excel sheet for them to be used later. Issue is i need them in 3 different cells, first
,last
and initial
. The script looks for a keyword in this case its est of
and prints the whole line, which has the the full name along with the "est of". I need it to:
- Drop the est of from the end.
- Split the full name into 3 so it can be exported out to a sheet.
Heres the code:
#!python
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from random import randint
import pickle
import datetime
import os
import time
import sys
import openpyxl
from openpyxl import Workbook
import re
url = 'https://www.miamidade.gov/global/home.page'
current_time = datetime.datetime.now()
current_time.strftime("%m/%d/%Y")
options = webdriver.ChromeOptions()
options.headless = True
chromedriver = "chromedriver.exe"
number = "2080"
driver = webdriver.Chrome(chromedriver) #chromedriver
driver.get(url)
pickle.dump(driver.get_cookies() , open("cookies.pkl","wb"))
time.sleep(3)
nav1 = driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div/header/div[2]/nav/div/div[1]/div/div[1]/a').click()
time.sleep(1)
nav2 = driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div/header/div[2]/div[2]/div/div/div/ul/li[1]/button').click()
propsrch1 = driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div/header/div[2]/div[2]/div/div/div/ul/li[1]/ul/li[2]/ul/li[5]/a').click()
time.sleep(2)
propsrch2 = driver.find_element_by_xpath('/html/body/div[2]/div/main/div[2]/div/div[2]/div/div[1]/div[1]/ul/li[1]/span/a').click()
time.sleep(5)
subdivision = driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div[1]/ul/li[3]/a').click()
searchbar = driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div[1]/div[2]/div[2]/div/div[3]/div/input')
time.sleep(2)
searchbar.send_keys("RICHMOND HGTS")
search = driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div[1]/div[2]/div[2]/div/div[3]/div/span/button/span').click()
time.sleep(10)
table = driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div[1]/div[2]/div[4]/a').click()
main_window_handle = None
while not main_window_handle:
main_window_handle = driver.current_window_handle
#driver.find_element_by_xpath(u'//a[text()="click here"]').click()
signin_window_handle = None
while not signin_window_handle:
for handle in driver.window_handles:
if handle != main_window_handle:
signin_window_handle = handle
break
driver.switch_to.window(signin_window_handle)
time.sleep(20)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
keyword = 'est of'
#keywords = soup.find(keyword)
counts = soup.find_all(text=re.compile("EST OF"))
for count in counts:
print(count)
Right now its printing into the cmd, just so i can see that its working. which looks like this:
GRACE K ROLLE EST OF
ETHEL H FIFE EST OF
BARBARA J BROUSSARD EST OF
CLEMENTINA D RAHMING EST OF
CHARLES B CAMBRIDGE JR EST OF
EMILY STATEN EST OF
HATTIE S KING EST OF
What is the best way to go about splitting up the name?
CodePudding user response:
You can split following space using split methods
for count in counts:
count= count.split(' ')
First_name=counnt[0]
mid_name=count[1]
Last_name=count[2]
CodePudding user response:
If you know it's always going to be 3 words separated by a space, you can use count.split(' ')[:3]
.
If you don't know how long the name will be, you can use count.rstrip('EST OF').split(' ')
.