Change Resource model

This commit is contained in:
Serene-Arc
2021-02-26 18:56:05 +10:00
committed by Ali Parlakci
parent 28f5ea69c3
commit 228cd5f687

View File

@@ -3,25 +3,52 @@
import hashlib import hashlib
import re import re
import time
from typing import Optional
import _hashlib
import requests
from praw.models import Submission from praw.models import Submission
from bulkredditdownloader.errors import BulkDownloaderException
class Resource: class Resource:
def __init__(self, source_submission: Submission, url: str, content: bytes): def __init__(self, source_submission: Submission, url: str, extension: str = None):
self.source_submission = source_submission self.source_submission = source_submission
self.content = content self.content: Optional[bytes] = None
self.url = url self.url = url
self.hash = hashlib.md5(content) self.hash: Optional[_hashlib.HASH] = None
self.extension = self._get_extension(url) self.extension = extension
if not self.extension:
self.extension = self._determine_extension()
@staticmethod @staticmethod
def _get_extension(url: str) -> str: def retry_download(url: str, wait_time: int) -> Optional[bytes]:
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))') try:
if results := re.search(pattern, url): response = requests.get(url)
if len(results.groups()) > 1: if response.status_code == 200:
return results[0] return response.content
if "v.redd.it" not in url: else:
return '.jpg' raise requests.exceptions.ConnectionError
else: except requests.exceptions.ConnectionError:
return '.mp4' time.sleep(wait_time)
if wait_time < 300:
return Resource.retry_download(url, wait_time + 60)
else:
return None
def download(self):
if not self.content:
content = self.retry_download(self.url, 0)
if content:
self.content = content
self.hash = hashlib.md5(self.content)
else:
raise BulkDownloaderException('Could not download resource')
def _determine_extension(self) -> str:
extension_pattern = r'.*(\..{3,5})$'
match = re.search(extension_pattern, self.url)
if match:
return match.group(1)