Use config value to specify max wait time

This commit is contained in:
Serene-Arc
2021-04-12 16:47:04 +10:00
committed by Ali Parlakci
parent 0c960a4d0c
commit aa8032e95f
13 changed files with 19 additions and 17 deletions

View File

@@ -2,4 +2,5 @@
client_id = U-6gk4ZCh3IeNQ client_id = U-6gk4ZCh3IeNQ
client_secret = 7CZHY6AmKweZME5s50SfDGylaPg client_secret = 7CZHY6AmKweZME5s50SfDGylaPg
scopes = identity, history, read, save scopes = identity, history, read, save
backup_log_count = 3 backup_log_count = 3
max_wait_time = 120

View File

@@ -362,7 +362,7 @@ class RedditDownloader:
logger.debug(f'File {destination} already exists, continuing') logger.debug(f'File {destination} already exists, continuing')
else: else:
try: try:
res.download() res.download(self.cfg_parser.getint('DEFAULT', 'max_wait_time', fallback=120))
except errors.BulkDownloaderException as e: except errors.BulkDownloaderException as e:
logger.error( logger.error(
f'Failed to download resource {res.url} with downloader {downloader_class.__name__}: {e}') f'Failed to download resource {res.url} with downloader {downloader_class.__name__}: {e}')

View File

@@ -27,7 +27,8 @@ class Resource:
self.extension = self._determine_extension() self.extension = self._determine_extension()
@staticmethod @staticmethod
def retry_download(url: str, wait_time: int) -> Optional[bytes]: def retry_download(url: str, max_wait_time: int) -> Optional[bytes]:
wait_time = 60
try: try:
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
@@ -40,16 +41,16 @@ class Resource:
except requests.exceptions.ConnectionError as e: except requests.exceptions.ConnectionError as e:
logger.warning(f'Error occured downloading from {url}, waiting {wait_time} seconds: {e}') logger.warning(f'Error occured downloading from {url}, waiting {wait_time} seconds: {e}')
time.sleep(wait_time) time.sleep(wait_time)
if wait_time < 300: if wait_time < max_wait_time:
return Resource.retry_download(url, wait_time + 60) return Resource.retry_download(url, max_wait_time)
else: else:
logger.error(f'Max wait time exceeded for resource at url {url}') logger.error(f'Max wait time exceeded for resource at url {url}')
raise raise
def download(self): def download(self, max_wait_time: int):
if not self.content: if not self.content:
try: try:
content = self.retry_download(self.url, 0) content = self.retry_download(self.url, max_wait_time)
except requests.exceptions.ConnectionError as e: except requests.exceptions.ConnectionError as e:
raise BulkDownloaderException(f'Could not download resource: {e}') raise BulkDownloaderException(f'Could not download resource: {e}')
except BulkDownloaderException: except BulkDownloaderException:

View File

@@ -21,5 +21,5 @@ def test_download_resource(test_url: str, expected_hash: str):
resources = test_site.find_resources() resources = test_site.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -52,6 +52,6 @@ def test_download_resource(test_url: str, expected_hashes: tuple[str]):
mock_submission.url = test_url mock_submission.url = test_url
test_site = Erome(mock_submission) test_site = Erome(mock_submission)
resources = test_site.find_resources() resources = test_site.find_resources()
[res.download() for res in resources] [res.download(120) for res in resources]
resource_hashes = [res.hash.hexdigest() for res in resources] resource_hashes = [res.hash.hexdigest() for res in resources]
assert len(resource_hashes) == len(expected_hashes) assert len(resource_hashes) == len(expected_hashes)

View File

@@ -55,6 +55,6 @@ def test_gallery_download(test_submission_id: str, expected_hashes: set[str], re
test_submission = reddit_instance.submission(id=test_submission_id) test_submission = reddit_instance.submission(id=test_submission_id)
gallery = Gallery(test_submission) gallery = Gallery(test_submission)
results = gallery.find_resources() results = gallery.find_resources()
[res.download() for res in results] [res.download(120) for res in results]
hashes = [res.hash.hexdigest() for res in results] hashes = [res.hash.hexdigest() for res in results]
assert set(hashes) == expected_hashes assert set(hashes) == expected_hashes

View File

@@ -32,5 +32,5 @@ def test_download_resource(test_url: str, expected_hash: str):
resources = test_site.find_resources() resources = test_site.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -33,5 +33,5 @@ def test_download_resource(test_url: str, expected_hash: str):
resources = test_site.find_resources() resources = test_site.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -129,7 +129,7 @@ def test_find_resources(test_url: str, expected_hashes: list[str]):
downloader = Imgur(mock_download) downloader = Imgur(mock_download)
results = downloader.find_resources() results = downloader.find_resources()
assert all([isinstance(res, Resource) for res in results]) assert all([isinstance(res, Resource) for res in results])
[res.download() for res in results] [res.download(120) for res in results]
hashes = set([res.hash.hexdigest() for res in results]) hashes = set([res.hash.hexdigest() for res in results])
assert len(results) == len(expected_hashes) assert len(results) == len(expected_hashes)
assert hashes == set(expected_hashes) assert hashes == set(expected_hashes)

View File

@@ -33,5 +33,5 @@ def test_download_resource(test_url: str, expected_hash: str):
resources = test_site.find_resources() resources = test_site.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -19,5 +19,5 @@ def test_find_resources(test_submission_id: str, expected_hash: str, reddit_inst
resources = downloader.find_resources() resources = downloader.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -22,5 +22,5 @@ def test_find_resources(test_url: str, expected_hash: str):
resources = downloader.find_resources() resources = downloader.find_resources()
assert len(resources) == 1 assert len(resources) == 1
assert isinstance(resources[0], Resource) assert isinstance(resources[0], Resource)
resources[0].download() resources[0].download(120)
assert resources[0].hash.hexdigest() == expected_hash assert resources[0].hash.hexdigest() == expected_hash

View File

@@ -28,5 +28,5 @@ def test_resource_get_extension(test_url: str, expected: str):
)) ))
def test_download_online_resource(test_url: str, expected_hash: str): def test_download_online_resource(test_url: str, expected_hash: str):
test_resource = Resource(MagicMock(), test_url) test_resource = Resource(MagicMock(), test_url)
test_resource.download() test_resource.download(120)
assert test_resource.hash.hexdigest() == expected_hash assert test_resource.hash.hexdigest() == expected_hash