I am trying to use Reddit's developer API to build a simple scraper that grabs posts and their replies in a target subreddit and produces JSON with the information.
I am getting a 404 error that I don't understand.
This is my code:
import praw
import json
def scrape(subreddit, limit):
r = praw.Reddit(user_agent='Reddit data organizer 1.0 by /u/reallymemorable', client_id='none of your business', client_secret='none of your business')
submissions = r.subreddit(subreddit).get_hot(limit=limit)
for submission in submissions:
data = {}
data['title'] = submission.title
data['score'] = submission.score
data['url'] = submission.url
data['author'] = str(submission.author)
data['subreddit'] = str(submission.subreddit)
data['num_comments'] = submission.num_comments
data['over_18'] = submission.over_18
data['selftext'] = submission.selftext
data['is_self'] = submission.is_self
data['name'] = submission.name
data['created_utc'] = submission.created_utc
data['permalink'] = submission.permalink
data['domain'] = submission.domain
data['id'] = submission.id
data['kind'] = submission.kind
json.dumps(data)
scrape('https://www.reddit.com/r/funny/', 25)
When I run it, I get this:
reallymemorable@Christians-MBP Desktop % python3 fetch-data-subreddit.py
Traceback (most recent call last):
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 26, in <module>
scrape('https://www.reddit.com/r/augmentedreality/comments/yv7sn8/ar_maximum_distance/', 25)
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 6, in scrape
submissions = r.subreddit(subreddit).get_hot(limit=limit)
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/base.py", line 34, in __getattr__
self._fetch()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 583, in _fetch
data = self._fetch_data()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 580, in _fetch_data
return self._reddit.request(method="GET", params=params, path=path)
File "/opt/homebrew/lib/python3.9/site-packages/praw/util/deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "/opt/homebrew/lib/python3.9/site-packages/praw/reddit.py", line 941, in request
return self._core.request(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 330, in request
return self._request_with_retries(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 266, in _request_with_retries
raise self.STATUS_EXCEPTIONS[response.status_code](response)
prawcore.exceptions.NotFound: received 404 HTTP response
CodePudding user response:
r.subreddit(subreddit)
- subreddit
should just be the name of the subreddit e.g. "funny" and not the full URL.
See the docs here: https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#obtain-a-subreddit