This is a sample code. You need to get the url from the request with txt
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15;rv:84.0) Gecko/20100101 Firefox/84.0",}
page = requests.get('https://duckduckgo.com/html/?q=test', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)
for link in soup:
print(link['href'])
CodePudding user response:
You can use f-strings
search_text = "foo"
page = requests.get(f'https://duckduckgo.com/html/?q={search_text}', headers=headers).text
CodePudding user response:
import requests, argparse, ScrapeSearchEngine, time, threading
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
}
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dorks", help="Your dorks list", required=True)
args = parser.parse_args()
with open(args.dorks, 'r') as f:
dorks = [line.strip('\n') for line in f]
scraped = 0
for dork in dorks:
if os.name == "nt":
os.system('title SQLI Crawler ^| Dork: ' str(dork) ' ^| Scraped Links: ' str(scraped))
search = (dork)
page = requests.get(f'https://duckduckgo.com/html/?q={search_text}', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)
for link in soup:
print(link['href'])
CodePudding user response:
Added new changes. I can't add the number of pages in search and save to links.txt
import os, requests, argparse, colorama, ScrapeSearchEngine, time, threading
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
}
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dorks", help="Your dorks list", required=True)
args = parser.parse_args()
with open(args.dorks, 'r') as f:
dorks = [line.strip('\n') for line in f]
scraped = 0
for dork in dorks:
if os.name == "nt":
os.system('title SQLI Crawler ^| Dork: ' str(dork) ' ^| Scraped Links: ' str(scraped))
search = (dork)
page = requests.get(f'https://duckduckgo.com/html/?q={search}', headers=headers).text
soup = BeautifulSoup(page, 'html.parser').find_all("a", class_="result__url", href=True)
for link in ScrapedLinks:
scraped = 1
open('links.txt', 'a ').write(link "\n")
print(f"[{Fore.CYAN}{time.strftime('%H:%M:%S')}{Fore.RESET}] [{Fore.YELLOW}INFO{Fore.RESET}] " link)
if args.scan == 'true':
threading.Thread(target=scanner, args=(link, )).start()