From ac91c9089c652e77a23ea44a556632e4d8e17636 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 10 Dec 2022 21:19:29 -0500 Subject: [PATCH] Add 5xx soft fail for clone/archive --- bdfr/archiver.py | 40 +++++++++++++++++++++++----------------- bdfr/cloner.py | 18 ++++++++++++------ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 28a270b..e2ed33d 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -4,6 +4,7 @@ import json import logging import re +from time import sleep from typing import Iterator, Union import dict2xml @@ -28,23 +29,28 @@ class Archiver(RedditConnector): def download(self): for generator in self.reddit_lists: - for submission in generator: - try: - if (submission.author and submission.author.name in self.args.ignore_user) or ( - submission.author is None and "DELETED" in self.args.ignore_user - ): - logger.debug( - f"Submission {submission.id} in {submission.subreddit.display_name} skipped" - f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user" - ) - continue - if submission.id in self.excluded_submission_ids: - logger.debug(f"Object {submission.id} in exclusion list, skipping") - continue - logger.debug(f"Attempting to archive submission {submission.id}") - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") + try: + for submission in generator: + try: + if (submission.author and submission.author.name in self.args.ignore_user) or ( + submission.author is None and "DELETED" in self.args.ignore_user + ): + logger.debug( + f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to" + f" {submission.author.name if submission.author else 'DELETED'} being an ignored user" + ) + continue + if submission.id in self.excluded_submission_ids: + logger.debug(f"Object {submission.id} in exclusion list, skipping") + continue + logger.debug(f"Attempting to archive submission {submission.id}") + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] diff --git a/bdfr/cloner.py b/bdfr/cloner.py index c26d17b..e82cfaa 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -2,6 +2,7 @@ # coding=utf-8 import logging +from time import sleep import prawcore @@ -18,9 +19,14 @@ class RedditCloner(RedditDownloader, Archiver): def download(self): for generator in self.reddit_lists: - for submission in generator: - try: - self._download_submission(submission) - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") + try: + for submission in generator: + try: + self._download_submission(submission) + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60)