I have a problem for my coding "threading"
Here's my coding:
def read_email_from_gmail():
data = ["one","two","three",'four','five','six','seven','eight','nine','ten']
for x in data:
print(f'{x} \n')
threads = input("\n Amount of Thread ?: ")
for i in range(int(threads)):
threading.Thread(target=read_email_from_gmail).start()
I want the result to be sequential when using multithreading, but I get result like this.
My expected result is
one
two
... until
nine
ten
is something wrong with my coding?
my original coding
def read_email_from_gmail():
try:
mail = imaplib.IMAP4_SSL(SMTP_SERVER)
mail.login(ORG_EMAIL,FROM_PWD)
mail.select('inbox')
data = mail.search(None, '(Subject "verify email")' )
mail_ids = data[1]
id_list = mail_ids[0].split()
if not id_list:
print("No Email Found")
else:
print(f"{Fore.LIGHTWHITE_EX}----------------------------------------{Fore.RESET}")
print(f"{Fore.LIGHTMAGENTA_EX} Found {len(id_list)} Email{Fore.RESET}")
print(f"{Fore.LIGHTWHITE_EX}----------------------------------------{Fore.RESET}")
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
x = latest_email_id 1
n = 0
for i in range(first_email_id,x):
data = mail.fetch(str(i), '(RFC822)' )
for response_part in data:
arr = response_part[0]
if isinstance(arr, tuple):
msg = email.message_from_string(str(arr[1],'utf-8'))
for part in msg.walk():
if part.get_content_type() == 'text/plain':
plain_text = part.get_payload(decode=True)
str_enc = str(plain_text)
extractor = URLExtract()
extractor.update()
urls = extractor.find_urls(str_enc)
str_url = str(urls).replace("\\","").replace("rnrn'","").replace('"','').replace("[","").replace("]","")
to = msg['to']
rancolor = random.choice(color)
print(f"[{Fore.MAGENTA}{n}{Fore.RESET}] {rancolor}[ {to} ] Found Verification Link {Fore.RESET}")
n = 1
save = open("list.txt", "a")
save.write(f"{str_url}\n")
save.close()
CodePudding user response:
You have essentially this code
t0 = time()
for id_ in id_list:
data = mail.fetch(id_, '(RFC822)' )
...
elapsed = time() - t0
and you complain that elapsed
is "big". Ok.
Having e.g. two threads process that list will just result in double work, not what you want, as both threads visit each ID.
What you're looking for is ThreadPoolExecutor.
with ThreadPoolExecutor(max_workers=2) as pool:
d = {pool.submit(read_message, id_, 60): id_ for id_ in id_list}
for future in as_completed(futures):
id_ = d[future]
data = future.result()
...
def read_message(id_: str):
data = mail.fetch(id_, '(RFC822)' )
return data
This will go faster than single threading, if contention for the single GIL is low because network operations are "slow".
Using multiprocesing impresses me as simpler than threading for your initial try.
with Pool(processes=2) as pool:
for data in pool.map(read_message, id_list):
print(data)
...