import scrapy
from scrapy.http import Request
from bs4 import BeautifulSoup
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.baroul-bucuresti.ro/tablou-definitivi']
page_number = 1
def parse(self, response):
base_url='https://www.baroul-bucuresti.ro'
soup=BeautifulSoup(response.text, 'html.parser')
tra = soup.find_all('div',class_='panel-title')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True)[1:]:
comp=base_url link['href']
yield Request(comp, callback=self.parse_book)
def parse_book(self, response):
# header=response.xpath("//div[@class='av_bot_left left']")
# for k in header:
# title=k.xpath("//h1//text()").get()
# title=title.strip()
# dec=k.xpath("//p[@class='ral_r f16']//text()").get()
# dec=dec.strip()
d1=''
d2=''
d3=''
d4=''
d5=''
detail=response.xpath("//div[@class='av_bot_left left']//p")
for i in range(len(detail)):
if 'Decizia de intrare:' in detail[i].get():
d1=detail[i].xpath('.//text()').getall()
d1 = [i.strip() for i in d1 if i.strip()][-1]
print(d1)
elif 'Telefon:' in detail[i].get():
d2=detail[i].xpath('.//text()').getall()
d2 = [i.strip() for i in d2 if i.strip()][-1]
print(d2)
This is my output I want to remove the dot from the phone number and replace 0 With
these is the page link https://www.baroul-bucuresti.ro/avocat/15655/aanegroae-ana-maria
0752.172.817
I want output like that It possible to get these output
752 172 817
CodePudding user response:
You can use the replace()
function to solve the part of changing dots by spaces, also you need to slice the first character of the string to remove the first zero:
out = "0752.172.817"
out = " " out[1:].replace(".", " ")
print(out)
Note that you must not use replace()
to remove the first zero as the phone number could contain a zero and be replaced by a
character
Output:
752 172 817