I have written a Playwright async function which extracts laboratory results from our lab information system. However, I struggle to get rid of the following error despite "await"-ing every request I can see. The code still runs successfully, but the error is annoying. Using Python 3.11 Error produced:
RuntimeWarning: coroutine 'Page.inner_html' was never awaited if j >= 10 :
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
Inside the file functions.py:
...
async def scrape_results(page, links_master):
master_results=pd.DataFrame()
j=1
for link in links_master:
await page.goto("https://trakcarelabwebview.nhls.ac.za/trakcarelab/csp/system.Home.cls" link)
await page.wait_for_load_state(state="networkidle")
await page.wait_for_selector(loading_icon, state="hidden")
await page.wait_for_timeout(500)
await page.wait_for_selector(loading_icon, state="hidden")
await page.locator(test_item_caption).text_content(timeout=3000) == "TestItem"
#Getting results off the page
html = await page.content()
soup = BeautifulSoup(html, "lxml")
tables = soup.find_all('table')
dfs = pd.read_html(str(tables))
df=dfs[1]
try:
word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()
except:
episode = await page.locator("#web_EPVisitNumber_List_Banner-row-0-item-Episode").text_content()
collectiondate = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionDate").text_content()
collectiontime = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionTime").text_content()
resultdate = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultDate").text_content()
resulttime = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultTime").text_content()
specimen_info = {
'episode': episode,
'collectiondate': collectiondate,
'collectiontime': collectiontime,
'resultdate':resultdate,
'resulttime':resulttime
}
cleanup_results(df)
#df = df.assign("Episode"==specimen_info.episode)
#df = df.assign("Episode"==specimen_info['episode'])
df=df.assign(Episode=specimen_info['episode'])
df=df.assign(Collection_Date=specimen_info["collectiondate"])
df=df.assign(Collection_Time=specimen_info["collectiontime"])
df=df.assign(Result_Date=specimen_info["resultdate"])
df=df.assign(Result_Time=specimen_info["resulttime"])
master_results=pd.concat([master_results,df], ignore_index=True)
j =1
print("Result number __" str(j) "__ extracted")
if j >= 10 :
print(master_results)
try:
print(word_result)
break
except:
print("No word_result found...")
break
...
I have tried adding await to all queries made to the Playwright page object, but I may have missed something.
This is the code where I'm calling the function:
import asyncio
from playwright.async_api import Playwright, async_playwright, expect
import functions
#The elements to interact with
username_box = "#SSUser_Logon_0-item-USERNAME"
password_box = "#SSUser_Logon_0-item-PASSWORD"
...
_username = "User"
_password = "Password"
_param = "MRN139822539"
async def run(playwright: Playwright, _username, _password, _param) -> None:
browser = await playwright.chromium.launch(headless=False, slow_mo=50)
try:
context = await browser.new_context(storage_state="state.json")
print("Storage state loaded...")
except:
print("Error upon loading storage state - continuing with login...")
page = await context.new_page()
await page.goto("https://the-web-site-im-visiting.com")
all_links = await functions.get_links(page)
await functions.scrape_results(page, all_links)
context = await browser.new_context(storage_state="state.json")
await page.wait_for_timeout(2000)
await context.close()
await browser.close()
async def main() -> None:
async with async_playwright() as playwright:
await run(playwright, _username, _password, _param)
asyncio.run(main())
CodePudding user response:
I would say is this line:
word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()
You can await for page.inner_html:
word_result = BeautifulSoup(await page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()