I am new to python & want to use multithreading for validating email lists (3000 in CSV file). I try this article: https://stackoverflow.com/a/66413087/12031112 but I can not implement multithreading(email validation part works fine) & gets error messages. Any help will be appreciated.
pool_sema = threading.Semaphore(value=maxthreads)
# make empty list
fields = []
emails_list = []
deliverable_list = []
hostexists_list = []
csvFilename = r'test2.csv'
with open(csvFilename, 'r', encoding="utf-8") as csvFile:
csvreader = csv.reader(csvFile)
# extracting field names through first row
fields = next(csvreader) # skip the header
for row in csvreader:
x,y = [str(value)for value in row]
inputAddress = str(y)
emails_list.append(inputAddress)
def task(i):
pool_sema.acquire()
try:
normal_verifier = Verifier(source_addr='[email protected]')
results = normal_verifier.verify(i) # i will be email to be verify
deliverable_list.append(results["deliverable"])
hostexists_list.append(results["host_exists"])
time.sleep(3)
except Exception as e:
print("Error")
finally:
pool_sema.release()
def create_threads(number_of_threads):
try:
threads = [threading.Thread(target=task, args=(i)) for i in emails_list]
[t.start() for t in threads]
except Exception as e:
print("Error")
finally:
[t.join() for t in threads]
create_threads((len(emails_list)))
# Create Data frame
# Making number of columns
validating_Email = pd.DataFrame(columns=['Email', 'Deliverable', 'Host exists'])
#Dictionary
data_dictionary = {'Email':emails_list, 'Deliverable':deliverable_list, 'Host exists' :hostexists_list}
# Dictionary to Pandas DataFrame
df = pd.DataFrame.from_dict(data_dictionary)
# Save Data in EXCEL
df.to_excel('gmail_multi_thread_1.xlsx', index = False)
got errors:
Exception in thread Thread-505:
Traceback (most recent call last):
File "C:\Users\Aaditri\anaconda3\lib\threading.py", line 973, in _bootstrap_inner
Exception in thread Thread-506:
Traceback (most recent call last):
File "C:\Users\Aaditri\anaconda3\lib\threading.py", line 973, in _bootstrap_inner
Exception in thread Thread-507:
Exception in thread self.run()
File "C:\Users\Aaditri\anaconda3\lib\threading.py", line 910, in run
File "C:\Users\Aaditri\anaconda3\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
TypeError: task() takes 1 positional argument but 28 were given
Exception in thread Thread-530:
Traceback (most recent call last):
File "C:\Users\Aaditri\anaconda3\lib\threading.py", line 973, in _bootstrap_inner
Exception in thread Thread-531:
CodePudding user response:
The threading.Thread args
expects a tuple (or otherwise an iterable), but you're passing it i
which I presume is a str
of length 28, which threading will pass into your task
method as 28 different arguments. Since you're only passing a single argument, you still have to make it a tuple
by adding a ,
.
# notice you should put a comma here in args=(i,)
threads = [threading.Thread(target=task, args=(i,)) for i in emails_list]
Here's the code in threading.Thread
where it actually passes your arguments to the target
(your task
method) - it unpacks all of the args
you gave it, so it would try to unpack your email as the individual characters.
def run(self):
"""Method representing the thread's activity.
You may override this method in a subclass. The standard run() method
invokes the callable object passed to the object's constructor as the
target argument, if any, with sequential and keyword arguments taken
from the args and kwargs arguments, respectively.
"""
try:
if self._target is not None:
self._target(*self._args, **self._kwargs)
finally:
# Avoid a refcycle if the thread is running a function with
# an argument that has a member that points to the thread.
del self._target, self._args, self._kwargs