Move to different program structure

This commit is contained in:
Serene-Arc
2021-02-11 09:10:40 +10:00
committed by Ali Parlakci
parent a72abd6603
commit a7f1db14e5
24 changed files with 504 additions and 2133 deletions

View File

@@ -1,105 +1,46 @@
#!/usr/bin/env python3
# coding=utf-8
import hashlib
import logging
import re
from abc import ABC, abstractmethod
from pathlib import Path
import requests
from praw.models import Submission
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.errors import FailedToDownload
from bulkredditdownloader.resource import Resource
logger = logging.getLogger(__name__)
class BaseDownloader(ABC):
def __init__(self, directory: Path, post: dict):
def __init__(self, directory: Path, post: Submission):
self.directory = directory
self.post = post
self.hashes = []
@abstractmethod
def download(self):
def download(self) -> list[Resource]:
raise NotImplementedError
@staticmethod
def _create_hash(content: bytes) -> str:
hash_md5 = hashlib.md5(content)
return hash_md5.hexdigest()
@staticmethod
def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
formats = {
"videos": [".mp4", ".webm"],
"images": [".jpg", ".jpeg", ".png", ".bmp"],
"gifs": [".gif"],
"self": []
def _download_resource(self, resource_url: str):
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
}
for file_type in GLOBAL.arguments.skip:
for extension in formats[file_type]:
if extension in filename:
raise TypeInSkip
if any(domain in image_url for domain in GLOBAL.arguments.skip_domain):
raise DomainInSkip
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
("Accept-Language", "en-US,en;q=0.8"),
("Connection", "keep-alive")
]
folder_dir.mkdir(exist_ok=True)
if "imgur" not in image_url:
addheaders = headers
else:
addheaders = None
if not silent:
logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
# Loop to attempt download 3 times
for i in range(3):
file_path = Path(folder_dir) / filename
if file_path.is_file():
raise FileAlreadyExistsError
else:
try:
download_content = requests.get(image_url, headers=addheaders).content
except ConnectionResetError:
raise FailedToDownload
file_hash = BaseDownloader._create_hash(download_content)
if GLOBAL.arguments.no_dupes:
if file_hash in GLOBAL.downloadedPosts():
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(file_hash)
with open(file_path, 'wb') as file:
file.write(download_content)
if not silent:
logger.info(" " * indent + "Downloaded" + " " * 10)
return
try:
download_content = requests.get(resource_url, headers=headers).content
except ConnectionResetError:
raise FailedToDownload
return Resource(self.post, resource_url, download_content)
raise FailedToDownload
@staticmethod
def _get_extension(url: str) -> str:
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))')
if results := re.search(pattern, url):
if len(results.groups()) > 1:
return results[0]
if "v.redd.it" not in url:
return '.jpg'
else:
return '.mp4'