I want to extract from the id
but every id has different value check it:
div',id='statement80863
div',id='statement26092
and so on ............................
CODE
import requests
from bs4 import BeautifulSoup
import re
limit = 100
url = f'https://www.counselingcalifornia.com/cc/cgi-bin/utilities.dll/customlist?FIRSTNAME=~&LASTNAME=~&ZIP=&DONORCLASSSTT=&_MULTIPLE_INSURANCE=&HASPHOTOFLG=&_MULTIPLE_EMPHASIS=ÐNIC=&_MULTIPLE_LANGUAGE=ENG&QNAME=THERAPISTLIST&WMT=NONE&WNR=NONE&WHP=therapistHeader.htm&WBP=therapistList.htm&RANGE=1/{limit}&SORT=LASTNAME'
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Mobile Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('div', {'class':'row'})
for row in rows:
des=row.find('div',id='statement80863').text
print(des)
CodePudding user response:
You can use Regular Expressions to select only such <div>
tags.
row.find('div', {'id': re.compile('^statement.*')})
- will select all the<div>
tags that has anid
which starts with the wordstatement
.
import re
import requests
from bs4 import BeautifulSoup
url = 'https://www.counselingcalifornia.com/cc/cgi-bin/utilities.dll/customlist?FIRSTNAME=~&LASTNAME=~&ZIP=&DONORCLASSSTT=&_MULTIPLE_INSURANCE=&HASPHOTOFLG=&_MULTIPLE_EMPHASIS=ÐNIC=&_MULTIPLE_LANGUAGE=ENG&QNAME=THERAPISTLIST&WMT=NONE&WNR=NONE&WHP=therapistHeader.htm&WBP=therapistList.htm&RANGE=1/100&SORT=LASTNAME'
headers = {"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')
rows = soup.find_all('div', class_='row')
for row in rows:
d = row.find('div', {'id': re.compile('^statement*')})
if d:
# Your scraping code here...