from bs4 import BeautifulSoup
import requests as r
d='https://coreyms.com'
da=r.get(d).text
print(da)
corey=BeautifulSoup(da,'lxml')
print(corey.prettify())
for article in corey.find_all('article'):
hd=article.h2.a.text
print(hd)
summ=article.find('div',class_='entry-content').p.text
print(des)
vid=article.find('iframe',class_="youtube-player")["src"] #upon inspection, this line of code is not running correctly in the loop. Am i missing something?
splt_vd_link=vid.split('/')[4]
splt_vd_link=splt_vd_link.split('?')[0]
y_link=f'https:youtube.com/watch?v={splt_vd_link}'
print(y_link)
print()
I am learning how to scrape a webpage by following a YouTube tutorial. My line of code is exactly the same as shown in the tutorial but it returns an error as I run it. I am totally lost
CodePudding user response:
Your first problem is printing not existing variable print(des)
and the second thing is that not every <article>
tag on page you're trying to scrap contains <iframe>
with youtube link so you need to skip an iteration in loop in this case:
if vid is None:
continue
Full working code:
from bs4 import BeautifulSoup
import requests as r
d='https://coreyms.com'
da=r.get(d).text
print(da)
corey=BeautifulSoup(da,'lxml')
print(corey.prettify())
for article in corey.find_all('article'):
vid=article.find('iframe',class_="youtube-player")
if vid is None:
continue
vid = vid["src"]
hd=article.h2.a.text
print(hd)
summ=article.find('div',class_='entry-content').p.text
splt_vd_link=vid.split('/')[4]
splt_vd_link=splt_vd_link.split('?')[0]
y_link=f'https:youtube.com/watch?v={splt_vd_link}'
print(y_link)
print()