Home > other >  Remove unwanted character from string
Remove unwanted character from string

Time:12-07

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import pandas as pd
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 10)

url = "https://www.askgamblers.com/online-casinos/reviews/casino-friday"
driver.get(url)


soup = BeautifulSoup(driver.page_source, "html.parser")

product = []

pays=soup.select("div#tabPayments")

for pay in pays:
    try:
        t4=pay.select_one(" .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
        t4 = [i for i in t4 if i.text]
    except:
        pass
    
supports = soup.find("div", {"id": "tabCustomers"})
supports = supports.find("div", {"class": "review-details__text"})
email = "Support Email:" supports.text.replace("\n", "").split(":")[1]
print(email)

they show me output like that

['\nSupport\nEmail:\[email protected]\n', '\n']

but I want output like that:

 Support Email:[email protected] 

I want to remove all unwanted character from my string kindly recommend any solution these is the page link https://www.askgamblers.com/online-casinos/reviews/casino-friday

CodePudding user response:

Full Code

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import pandas as pd
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 10)

url = "https://www.askgamblers.com/online-casinos/reviews/casino-friday"
driver.get(url)


soup = BeautifulSoup(driver.page_source, "html.parser")

product = []

pays = soup.select("div#tabPayments")

for pay in pays:
    try:
        t4 = pay.select_one(
            " .review-details-wrapper  .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
        t4 = [i.replace("\n", "") for i in t4 if i.text]
    except:
        pass
    print(t4)
supports = soup.find("div", {"id": "tabCustomers"})
supports = supports.find("div", {"class": "review-details__text"})
email = "Support Email:" supports.text.replace("\n", "").split(":")[1]
print(email)

Output

['EWallets:0-1 hours', 'Bank Transfers:1-7 days', 'Cheques:Not offered', 'Card Payments:1-7 days', 'Pending Time:0-24 hours']
Support Email:[email protected]

Hope this helps. Happy Coding :)

CodePudding user response:

Looks like you could utilize two methods to achieve your goals: replace() and split() before appending.

CodePudding user response:

You can remove any unwanted characters in the string with use of .replace() method replacing the unwanted character with nothing, empty string.
Lets say your result string is stored in product string. So, to remove the unwanted [ sign you can do this:

product = product.replace('[', '')

Doing that for all the characters you want to remove will lead for code like this:

product = product.replace('[', '')
product = product.replace('[', '')
product = product.replace('\n', '')
product = product.replace("'", "")
product = product.replace(",", "")
product = product.strip()

The final strip() removes leading and trailing spaces.

  • Related