so here is my code, it creates the files which means it has access to write permissions but it just doesn't write anything inside, the function works and it prints on screen it just doesn't write anything, any idea why is that?
expected behavior: it to create two files and filter websites to working or non-working and state the error code for non-working ones
current behavior: it creates to empty files
import requests
from concurrent.futures import ThreadPoolExecutor
websites = []
f = open("websites.txt", "r")
for i in f:
if not i.startswith("http"):
i = "http://" i
websites.append(i.strip())
print("appended" i)
f.close()
with open("working.txt", "w") as fa, open("not_working.txt", "w") as fe:
def checker(website):
response = requests.get(website)
available = response.status_code == 200
print(response.status_code)
if available:
fa.write(website "\n")
else:
fe.write(website " error " response.status_code "\n")
with ThreadPoolExecutor() as executor:
executor.map(checker, websites)
CodePudding user response:
Code
executor.map(checker, websites)
creates generator
but it doesn't execute threads.
It needs at least list()
list( executor.map(checker, websites) )
to execute generator.
Full code:
import requests
from concurrent.futures import ThreadPoolExecutor
# --- functions ---
def read_urls(filename):
websites = []
with open(filename) as f:
for line in f:
line = line.strip()
if line: # skip empty lines
if not line.startswith("http"):
line = "http://" line
websites.append(line)
print("appended:", line)
return websites
def checker(website):
response = requests.get(website)
print('[checker]:', response.status_code, website)
if response.status_code == 200:
fa.write(f'{website}\n')
else:
fe.write(f'{website} error {response.status_code}\n')
# --- main ---
#websites = read_urls("websites.txt")
websites = ['https://stackoverflow.com', 'https://fake.com']
with open("working.txt", "w") as fa, open("not_working.txt", "w") as fe:
with ThreadPoolExecutor() as executor:
list( executor.map(checker, websites) )
But it may be safer to return result from function and write in main thread. And it will write results in the same order as data in original file. Writing in thread may get it in different order because threads may run in different order.
import requests
from concurrent.futures import ThreadPoolExecutor
# --- functions ---
def read_urls(filename):
websites = []
with open(filename) as f:
for line in f:
line = line.strip()
if line: # skip empty lines
if not line.startswith("http"):
line = "http://" line
websites.append(line)
print("appended:", line)
return websites
def checker(website):
response = requests.get(website)
print('[checker]:', response.status_code, website)
return website, response.status_code
# --- main ---
#websites = read_urls("websites.txt")
websites = ['https://stackoverflow.com', 'https://fake.com']
with open("working.txt", "w") as fa, open("not_working.txt", "w") as fe:
with ThreadPoolExecutor() as executor:
for website, status_code in executor.map(checker, websites):
print('[main]:', status_code, website)
if status_code == 200:
fa.write(f'{website}\n')
else:
fe.write(f'{website} error {status_code}\n')