I'm creating my own scraper API using fastAPI. the problem that I have is I seem to be doing something wrong as my data(scraped data) is not showing up on the browser. I've made my scraper into a class to use it in my FastAPI app and the data is shown on my console but not on the uvicorn server via the browser. which I configured properly. I've included the browser screenshot as well.
Scraper API
from fastapi import FastAPI
from Scraper import scrape
app = FastAPI()
data = scrape()
@app.get("/data")
async def songs():
return data.scrapedata()
Scraper
import time
from selenium import webdriver
import selenium
from selenium.webdriver.chrome import service
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
class scrape:
def scrapedata(self):
ser = Service("C:\Program Files (x86)\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options,service=ser)
driver.get('https://soundcloud.com/jujubucks')
print(driver.title)
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
song_list = []
for i in range(1, 35):
song_contents = driver.find_element(By.XPATH, "//li[@class='soundList__item'][{}]".format(i))
driver.execute_script("arguments[0].scrollIntoView(true);",song_contents)
try:
search = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__username')]/span").text
search_song = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__title')]/span").text
search_date = song_contents.find_element(By.XPATH, ".//time[contains(@class,'relativeTime')]/span").text
search_plays = song_contents.find_element(By.XPATH, ".//span[contains(@class,'sc-ministats-small')]/span").text
except NoSuchElementException:
continue
if search_plays == False:
continue
option ={
'Artist': search,
'Song_title': search_song,
'Date': search_date,
'Streams': search_plays
}
song_list.append(option)
df = pd.DataFrame(song_list)
print(df)
driver.quit()
data = scrape()
data.scrapedata()
CodePudding user response:
You are literally just printing your result which should be passed to your FastAPI site
It should be return data
instead of print(data)
Like:
song_list.append(option)
df = pd.DataFrame(song_list)
return df
driver.quit()
which will be then, later, passed to the songs function of FastAPI
@app.get("/data")
async def songs():
return data.scrapedata()
And also, FastAPI will only return everything in JSON type, no matter if its a string, it'll end up appearing in a application/json
like response page
So it's better to convert the dataframe into dict before passing them into the FastAPI response like
return df.to_dict()
Or better just return the song_list
which reduces other unnecessary function operations (but you might have to process the response content into Dataframe if u really want it to be a Dataframe)
Like:
return song_list
Tell me if its not working...