Need help setting up parsing from a text document. Python3-CodePudding

This is a sample code. You need to get the url from the request with txt

import requests
from bs4 import BeautifulSoup

headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15;rv:84.0) Gecko/20100101 Firefox/84.0",}

page = requests.get('https://duckduckgo.com/html/?q=test', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)

for link in soup:
print(link['href'])

CodePudding user response：

You can use f-strings

search_text = "foo"
page = requests.get(f'https://duckduckgo.com/html/?q={search_text}', headers=headers).text

CodePudding user response：

import requests, argparse, ScrapeSearchEngine, time, threading
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
}
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dorks", help="Your dorks list", required=True)
args = parser.parse_args()
with open(args.dorks, 'r') as f:
    dorks = [line.strip('\n') for line in f]

scraped = 0
for dork in dorks:
    if os.name == "nt":
        os.system('title SQLI Crawler ^| Dork: ' str(dork) ' ^| Scraped Links: ' str(scraped))
search = (dork)
page = requests.get(f'https://duckduckgo.com/html/?q={search_text}', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)

for link in soup:
    print(link['href'])

CodePudding user response：

Added new changes. I can't add the number of pages in search and save to links.txt

import os, requests, argparse, colorama, ScrapeSearchEngine, time, threading
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
}
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dorks", help="Your dorks list", required=True)
args = parser.parse_args()
with open(args.dorks, 'r') as f:
    dorks = [line.strip('\n') for line in f]

scraped = 0
for dork in dorks:
    if os.name == "nt":
        os.system('title SQLI Crawler ^| Dork: ' str(dork) ' ^| Scraped Links: ' str(scraped))
search = (dork)
page = requests.get(f'https://duckduckgo.com/html/?q={search}', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)
for link in ScrapedLinks:
            scraped  = 1
            open('links.txt', 'a ').write(link "\n")

            print(f"[{Fore.CYAN}{time.strftime('%H:%M:%S')}{Fore.RESET}] [{Fore.YELLOW}INFO{Fore.RESET}] " link)

            if args.scan == 'true':
                threading.Thread(target=scanner, args=(link, )).start()