Integrate new base_downloader class

This commit is contained in:
Serene-Arc
2021-02-25 20:40:08 +10:00
committed by Ali Parlakci
parent a75e94e43e
commit e0d321c785
13 changed files with 70 additions and 93 deletions

View File

@@ -3,42 +3,22 @@
import logging
from abc import ABC, abstractmethod
from typing import Optional
import requests
from praw.models import Submission
from bulkredditdownloader.errors import SiteDownloaderError
from bulkredditdownloader.authenticator import Authenticator
from bulkredditdownloader.resource import Resource
logger = logging.getLogger(__name__)
class BaseDownloader(ABC):
def __init__(self, post: Submission):
def __init__(self, post: Submission, typical_extension: Optional[str] = None):
self.post = post
self.hashes = []
self.typical_extension = typical_extension
@abstractmethod
def download(self) -> list[Resource]:
def find_resources(self, authenticator: Optional[Authenticator] = None) -> list[Resource]:
"""Return list of all un-downloaded Resources from submission"""
raise NotImplementedError
def _download_resource(self, resource_url: str):
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
}
# Loop to attempt download 3 times
for i in range(3):
try:
download_content = requests.get(resource_url, headers=headers).content
except ConnectionResetError:
raise SiteDownloaderError
return Resource(self.post, resource_url, download_content)
raise SiteDownloaderError