get almost zero performance gain
i think i missing something here..
def take_page_scr(dict_item, driver) -> None:
print(dict_item['id'])
driver.get(dict_item['url'])
driver.set_window_size(500, 900)
(
Image.open(
io.BytesIO(
driver.get_screenshot_as_png()
)
)
.convert("RGB")
.save(f"./dst/{dict_item['id']}.jpg", quality=85)
)
driver.quit()
def main_async(data):
async def main(data):
options = get_options()
await asyncio.gather(
*(
asyncio.to_thread(
take_page_scr,
i, webdriver.Chrome(options=options)
)
for i in data
)
)
print()
print('#DONE')
asyncio.run(
main(
data
)
)
# 13.397236824035645
# 13.26906943321228
here is basic setup
def main_sync(data):
options = get_options()
driver = webdriver.Chrome(options=options)
for i in data:
print(i['id'])
try:
driver.get(i['url'])
driver.set_window_size(500, 900)
image_bytes = io.BytesIO(driver.get_screenshot_as_png())
img = Image.open(image_bytes).convert("RGB")
img.save(f"./dst/{i['id']}.jpg", quality=85)
except Exception:
pass
driver.quit()
print()
print('#DONE')
# 16.04508686065674
# 16.138192653656006
i guess problem in webdriver.Chrome(options=options)
def main_sync_bad():
options = get_options()
[
take_page_scr(
i, webdriver.Chrome(options=options)
) for i in data
]
print()
print('#DONE')
# 76.43093585968018
# 78.09915900230408
but i do not know how to propagate it to many threads
CodePudding user response:
and the answer is - just don't use poor designed crappy things like lxml or selenium, use well documented, nice designed and supported libraries, even if they from micro$oft ...
async def coro(dict_item: TestData, browser: BrowserContext) -> None:
print(dict_item["hash"])
page = await browser.new_page()
await page.goto(dict_item["url"])
await page.screenshot(path=f'./res/{dict_item["hash"]}.jpg', type="jpeg")
async def main() -> None:
async with async_playwright() as p:
browser: BrowserContext = await p.chromium.launch_persistent_context(
user_data_dir=f"{Path.home()}/.config/chromium",
executable_path="/usr/bin/chromium",
viewport={
"width": 500,
"height": 900
},
)
await asyncio.gather(*(coro(i, browser) for i in data()))
await browser.close()
# 7.495748519897461
# 2.4119105339050293
# 2.3972327709198
do it more than 2-3 times faster! and working with it just pure pleasure..!