Home > front end >  Playwright error: 'Page.inner_html' was never awaited RuntimeWarning: Enable tracemalloc t
Playwright error: 'Page.inner_html' was never awaited RuntimeWarning: Enable tracemalloc t

Time:12-21

I have written a Playwright async function which extracts laboratory results from our lab information system. However, I struggle to get rid of the following error despite "await"-ing every request I can see. The code still runs successfully, but the error is annoying. Using Python 3.11 Error produced:

RuntimeWarning: coroutine 'Page.inner_html' was never awaited   if j >= 10 : 
RuntimeWarning: Enable tracemalloc to get the object allocation traceback

Inside the file functions.py:

...
async def scrape_results(page, links_master):
    master_results=pd.DataFrame()
    j=1
    for link in links_master:
        await page.goto("https://trakcarelabwebview.nhls.ac.za/trakcarelab/csp/system.Home.cls"   link)
        await page.wait_for_load_state(state="networkidle")
        await page.wait_for_selector(loading_icon, state="hidden")
        await page.wait_for_timeout(500)
        await page.wait_for_selector(loading_icon, state="hidden")
        await page.locator(test_item_caption).text_content(timeout=3000) == "TestItem"
            #Getting results off the page
        html = await page.content()
        soup = BeautifulSoup(html, "lxml")
        tables = soup.find_all('table')
        dfs = pd.read_html(str(tables))
        df=dfs[1]
        try:
            word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()
        except:
            episode = await page.locator("#web_EPVisitNumber_List_Banner-row-0-item-Episode").text_content()
            collectiondate = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionDate").text_content()
            collectiontime = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionTime").text_content()
            resultdate = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultDate").text_content()
            resulttime = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultTime").text_content()
            specimen_info = {
                    'episode': episode,
                    'collectiondate': collectiondate,
                    'collectiontime': collectiontime,
                    'resultdate':resultdate,
                    'resulttime':resulttime
                    }

            cleanup_results(df)
            #df = df.assign("Episode"==specimen_info.episode)
            #df = df.assign("Episode"==specimen_info['episode'])
            df=df.assign(Episode=specimen_info['episode'])
            df=df.assign(Collection_Date=specimen_info["collectiondate"])
            df=df.assign(Collection_Time=specimen_info["collectiontime"])
            df=df.assign(Result_Date=specimen_info["resultdate"])
            df=df.assign(Result_Time=specimen_info["resulttime"])

            master_results=pd.concat([master_results,df], ignore_index=True)
            j =1
            print("Result number __"   str(j)   "__ extracted")
            if j >= 10 :
                print(master_results)
                try:
                    print(word_result)
                    break
                except:
                    print("No word_result found...")
                    break
...

I have tried adding await to all queries made to the Playwright page object, but I may have missed something.

This is the code where I'm calling the function:

import asyncio
from playwright.async_api import Playwright, async_playwright, expect

import functions

#The elements to interact with
username_box = "#SSUser_Logon_0-item-USERNAME"
password_box = "#SSUser_Logon_0-item-PASSWORD"
...

_username = "User"
_password = "Password"
_param = "MRN139822539"


async def run(playwright: Playwright, _username, _password, _param) -> None:
    browser = await playwright.chromium.launch(headless=False, slow_mo=50)
    try:
        context = await browser.new_context(storage_state="state.json")
        print("Storage state loaded...")
    except:
        print("Error upon loading storage state - continuing with login...")
    page = await context.new_page()
    await page.goto("https://the-web-site-im-visiting.com")
    all_links = await functions.get_links(page)
    await functions.scrape_results(page, all_links)

    
    context = await browser.new_context(storage_state="state.json")
    await page.wait_for_timeout(2000)
    await context.close()
    await browser.close()
    

async def main() -> None:
    async with async_playwright() as playwright:
        await run(playwright,  _username, _password, _param)


asyncio.run(main())

CodePudding user response:

I would say is this line:

word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()

You can await for page.inner_html:

word_result = BeautifulSoup(await page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()
  • Related