I want to store my url in the variable name "url" to save the url in an excel sheet CSV but giving me unboundlocalerror local variable 'url' referenced before the assignment.
class NewsSpider(scrapy.Spider): name = "articles"
def start_requests(self):
url = input("Enter the article url: ")
yield scrapy.Request(url, callback=self.parse_dir_contents)
def parse_dir_contents(self, response):
url = url
yield{
'Category':Category,
'Headlines':Headlines,
'Author': Author,
'Source': Source,
'Publication Date': Published_Date,
'Feature_Image': Feature_Image,
'Skift Take': skift_take,
'Article Content': Content
}
# =============== Data Store
Data = [[Category,Headlines,Author,Source,Published_Date,Feature_Image,Content,url]]
try:
df = pd.DataFrame (Data, columns = ['Category','Headlines','Author','Source','Published_Date','Feature_Image','Content','URL'])
print(df)
with open('C:/Users/Public/pagedata.csv', 'a') as f:
df.to_csv(f, header=False)
except:
df = pd.DataFrame (Data, columns = ['Category','Headlines','Author','Source','Published_Date','Feature_Image','Content','URL'])
print(df)
df.to_csv('C:/Users/Public/pagedata.csv', mode='a')
CodePudding user response:
You can just invoke response.url
instead of url = url
url = response.url
#OR
def parse_dir_contents(self, response):
yield{
'Category':Category,
'Headlines':Headlines,
'Author': Author,
'Source': Source,
'Publication Date': Published_Date,
'Feature_Image': Feature_Image,
'Skift Take': skift_take,
'Article Content': Content,
'url': response.url
}