Add a combined command for the archiver and downloader: clone (#433)

* Simplify downloader function

* Add basic scraper class

* Add "scrape" command

* Rename "scrape" command to "clone"

* Add integration tests for clone command

* Update README

* Fix failing test
This commit is contained in:
Serene
2021-06-06 20:29:09 +10:00
committed by GitHub
parent a2f010c40d
commit 434aeb8feb
6 changed files with 139 additions and 34 deletions

View File

@@ -42,19 +42,20 @@ class RedditDownloader(RedditConnector):
def download(self):
for generator in self.reddit_lists:
for submission in generator:
if submission.id in self.excluded_submission_ids:
logger.debug(f'Object {submission.id} in exclusion list, skipping')
continue
elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
else:
logger.debug(f'Attempting to download submission {submission.id}')
self._download_submission(submission)
self._download_submission(submission)
def _download_submission(self, submission: praw.models.Submission):
if not isinstance(submission, praw.models.Submission):
if submission.id in self.excluded_submission_ids:
logger.debug(f'Object {submission.id} in exclusion list, skipping')
return
elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
return
elif not isinstance(submission, praw.models.Submission):
logger.warning(f'{submission.id} is not a submission')
return
logger.debug(f'Attempting to download submission {submission.id}')
try:
downloader_class = DownloadFactory.pull_lever(submission.url)
downloader = downloader_class(submission)