Home > front end >  How to extract text from the different id from beautifulsoup
How to extract text from the different id from beautifulsoup

Time:11-26

I want to extract from the id but every id has different value check it:

    div',id='statement80863
    div',id='statement26092

and so on ............................ 

CODE

import requests
from bs4 import BeautifulSoup
import re

limit = 100

url = f'https://www.counselingcalifornia.com/cc/cgi-bin/utilities.dll/customlist?FIRSTNAME=~&LASTNAME=~&ZIP=&DONORCLASSSTT=&_MULTIPLE_INSURANCE=&HASPHOTOFLG=&_MULTIPLE_EMPHASIS=&ETHNIC=&_MULTIPLE_LANGUAGE=ENG&QNAME=THERAPISTLIST&WMT=NONE&WNR=NONE&WHP=therapistHeader.htm&WBP=therapistList.htm&RANGE=1/{limit}&SORT=LASTNAME'
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Mobile Safari/537.36'}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('div', {'class':'row'})
for row in rows:
    des=row.find('div',id='statement80863').text
    print(des)

CodePudding user response:

You can use Regular Expressions to select only such <div> tags.

row.find('div', {'id': re.compile('^statement.*')}) - will select all the <div> tags that has an id which starts with the word statement.

import re
import requests
from bs4 import BeautifulSoup

url = 'https://www.counselingcalifornia.com/cc/cgi-bin/utilities.dll/customlist?FIRSTNAME=~&LASTNAME=~&ZIP=&DONORCLASSSTT=&_MULTIPLE_INSURANCE=&HASPHOTOFLG=&_MULTIPLE_EMPHASIS=&ETHNIC=&_MULTIPLE_LANGUAGE=ENG&QNAME=THERAPISTLIST&WMT=NONE&WNR=NONE&WHP=therapistHeader.htm&WBP=therapistList.htm&RANGE=1/100&SORT=LASTNAME'
headers = {"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}

r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')

rows = soup.find_all('div', class_='row')
for row in rows:
    d = row.find('div', {'id': re.compile('^statement*')})
    if d:
        # Your scraping code here...
  • Related