I am currently writing a script where I want to store the headers and cookies whenever there is a detection of the website where I require to solve my very own created solution to login. Once the login is solved, I want to store the session into with the proxy. This is what I have written
from threading import Thread
import test_by_me
# Save the session to a dict to be re-used
saved_session: dict = {}
def injection(session, response):
if test_by_me.challenge(session, response):
solve = test_by_me.solve(session, response)
# Save the session to a dict to be re-used
saved_session[session.proxies["https"]] = {
"headers": session.headers,
"cookies": session.cookies
}
# Return the response
return solve
else:
return response
def create_session():
return test_by_me.create_scraper(
Hook=injection,
)
# ------------------------------------------------------------------------------- #
# Each thread runs this functions independently
def from_page(url):
while True:
with create_session() as session:
proxy = proxies.random_proxies() # Returns a single rando proxy format
session.proxies = {
'https': proxy.random_proxies()
}
if proxy in saved_session:
session.headers = saved_session[proxy]['headers']
session.cookies = saved_session[proxy]['cookies']
# Make sure to have GET inside proxies context manager
resp = session.get(url, timeout=6)
...
main:
def main() -> None:
db_urls = [...]
# Start threads for each url in the difference
for url in db_urls:
Thread(
target=from_page(url, )
).start()
if __name__ == '__main__':
main()
However my problem is that I cannot figure out how I can share the variable (saved_session) to all the threads that are being alive so I can re-use the same session if it generates the same proxy in the line proxy = proxies.random_proxies()
-> if its e.g. 'http://192.168.1.1:1841' and if we have that proxy stored into the saved_session, then I want to re-use the session. how can I do that?
CodePudding user response:
Why not injecting the session
object to each target thread function and using a threading.Lock
to change shared variables state?
Maybe you don't need to reuse the same session... still I would advise against spawning sessions per-threads, it becomes tricky and inefficient.
import threading
saved_session: dict = {}
lock = threading.Lock()
def from_page(url, session):
while True:
proxy = proxies.random_proxies() # Returns a single rando proxy format
# using lock to change shared variables
with lock:
session.proxies = {
'https': proxy.random_proxies()
}
if proxy in saved_session:
session.headers = saved_session[proxy]['headers']
session.cookies = saved_session[proxy]['cookies']
# Make sure to have GET inside proxies context manager
resp = session.get(url, timeout=6)
...
def main() -> None:
db_urls = [...]
with create_session() as session:
# Start threads for each url in the difference
for url in db_urls:
Thread(
target=from_page, args=(url,session, )
).start()