Move to different program structure
This commit is contained in:
@@ -1,105 +1,46 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
from bulkredditdownloader.errors import FailedToDownload
|
||||
from bulkredditdownloader.resource import Resource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseDownloader(ABC):
|
||||
def __init__(self, directory: Path, post: dict):
|
||||
def __init__(self, directory: Path, post: Submission):
|
||||
self.directory = directory
|
||||
self.post = post
|
||||
self.hashes = []
|
||||
|
||||
@abstractmethod
|
||||
def download(self):
|
||||
def download(self) -> list[Resource]:
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def _create_hash(content: bytes) -> str:
|
||||
hash_md5 = hashlib.md5(content)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
|
||||
formats = {
|
||||
"videos": [".mp4", ".webm"],
|
||||
"images": [".jpg", ".jpeg", ".png", ".bmp"],
|
||||
"gifs": [".gif"],
|
||||
"self": []
|
||||
def _download_resource(self, resource_url: str):
|
||||
headers = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
||||
"Safari/537.36 OPR/54.0.2952.64",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
|
||||
"Accept-Encoding": "none",
|
||||
"Accept-Language": "en-US,en;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
for file_type in GLOBAL.arguments.skip:
|
||||
for extension in formats[file_type]:
|
||||
if extension in filename:
|
||||
raise TypeInSkip
|
||||
|
||||
if any(domain in image_url for domain in GLOBAL.arguments.skip_domain):
|
||||
raise DomainInSkip
|
||||
|
||||
headers = [
|
||||
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
||||
"Safari/537.36 OPR/54.0.2952.64"),
|
||||
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
||||
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
||||
("Accept-Encoding", "none"),
|
||||
("Accept-Language", "en-US,en;q=0.8"),
|
||||
("Connection", "keep-alive")
|
||||
]
|
||||
|
||||
folder_dir.mkdir(exist_ok=True)
|
||||
|
||||
if "imgur" not in image_url:
|
||||
addheaders = headers
|
||||
else:
|
||||
addheaders = None
|
||||
|
||||
if not silent:
|
||||
logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
|
||||
|
||||
# Loop to attempt download 3 times
|
||||
for i in range(3):
|
||||
file_path = Path(folder_dir) / filename
|
||||
|
||||
if file_path.is_file():
|
||||
raise FileAlreadyExistsError
|
||||
else:
|
||||
try:
|
||||
download_content = requests.get(image_url, headers=addheaders).content
|
||||
except ConnectionResetError:
|
||||
raise FailedToDownload
|
||||
|
||||
file_hash = BaseDownloader._create_hash(download_content)
|
||||
if GLOBAL.arguments.no_dupes:
|
||||
if file_hash in GLOBAL.downloadedPosts():
|
||||
raise FileAlreadyExistsError
|
||||
GLOBAL.downloadedPosts.add(file_hash)
|
||||
|
||||
with open(file_path, 'wb') as file:
|
||||
file.write(download_content)
|
||||
if not silent:
|
||||
logger.info(" " * indent + "Downloaded" + " " * 10)
|
||||
return
|
||||
try:
|
||||
download_content = requests.get(resource_url, headers=headers).content
|
||||
except ConnectionResetError:
|
||||
raise FailedToDownload
|
||||
return Resource(self.post, resource_url, download_content)
|
||||
|
||||
raise FailedToDownload
|
||||
|
||||
@staticmethod
|
||||
def _get_extension(url: str) -> str:
|
||||
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))')
|
||||
if results := re.search(pattern, url):
|
||||
if len(results.groups()) > 1:
|
||||
return results[0]
|
||||
if "v.redd.it" not in url:
|
||||
return '.jpg'
|
||||
else:
|
||||
return '.mp4'
|
||||
|
||||
@@ -2,18 +2,14 @@
|
||||
|
||||
import pathlib
|
||||
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
|
||||
class Direct(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
self.post['EXTENSION'] = self._get_extension(self.post['CONTENTURL'])
|
||||
self.directory.mkdir(exist_ok=True)
|
||||
|
||||
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||
self._download_resource(pathlib.Path(filename), self.directory, self.post['CONTENTURL'])
|
||||
return [self._download_resource(self.post.url)]
|
||||
|
||||
@@ -7,77 +7,39 @@ import urllib.error
|
||||
import urllib.request
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Erome(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
try:
|
||||
images = self._get_links(self.post['CONTENTURL'])
|
||||
images = self._get_links(self.post.url)
|
||||
except urllib.error.HTTPError:
|
||||
raise NotADownloadableLinkError("Not a downloadable link")
|
||||
|
||||
images_length = len(images)
|
||||
how_many_downloaded = len(images)
|
||||
duplicates = 0
|
||||
|
||||
if images_length == 1:
|
||||
"""Filenames are declared here"""
|
||||
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||
if len(images) == 1:
|
||||
|
||||
image = images[0]
|
||||
if not re.match(r'https?://.*', image):
|
||||
image = "https://" + image
|
||||
|
||||
self._download_resource(filename, self.directory, image)
|
||||
return [self._download_resource(image)]
|
||||
|
||||
else:
|
||||
filename = GLOBAL.config['filename'].format(**self.post)
|
||||
logger.info(filename)
|
||||
|
||||
folder_dir = self.directory / filename
|
||||
|
||||
folder_dir.mkdir(exist_ok=True)
|
||||
|
||||
out = []
|
||||
for i, image in enumerate(images):
|
||||
extension = self._get_extension(image)
|
||||
filename = str(i + 1) + extension
|
||||
|
||||
if not re.match(r'https?://.*', image):
|
||||
image = "https://" + image
|
||||
|
||||
logger.info(" ({}/{})".format(i + 1, images_length))
|
||||
logger.info(" {}".format(filename))
|
||||
|
||||
try:
|
||||
self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2)
|
||||
except FileAlreadyExistsError:
|
||||
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||
duplicates += 1
|
||||
how_many_downloaded -= 1
|
||||
|
||||
except Exception as exception:
|
||||
# raise exception
|
||||
logger.error("\n Could not get the file")
|
||||
logger.error(
|
||||
" "
|
||||
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
|
||||
+ "\n"
|
||||
)
|
||||
how_many_downloaded -= 1
|
||||
|
||||
if duplicates == images_length:
|
||||
raise FileAlreadyExistsError
|
||||
elif how_many_downloaded + duplicates < images_length:
|
||||
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
||||
out.append(self._download_resource(image))
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> list[str]:
|
||||
|
||||
@@ -1,26 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
import logging
|
||||
import urllib.parse
|
||||
import pathlib
|
||||
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.errors import ImageNotFound, NotADownloadableLinkError
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
|
||||
NotADownloadableLinkError, TypeInSkip)
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Gallery(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
link = self.post['CONTENTURL']
|
||||
link = self.post.url
|
||||
self.raw_data = self._get_data(link)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
images = {}
|
||||
@@ -37,7 +34,7 @@ class Gallery(BaseDownloader):
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
self._download_album(images, count)
|
||||
return [self._download_album(images)]
|
||||
|
||||
@staticmethod
|
||||
def _get_data(link: str) -> dict:
|
||||
@@ -63,44 +60,9 @@ class Gallery(BaseDownloader):
|
||||
data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1])
|
||||
return data
|
||||
|
||||
def _download_album(self, images: dict, count: int):
|
||||
folder_name = GLOBAL.config['filename'].format(**self.post)
|
||||
folder_dir = self.directory / folder_name
|
||||
|
||||
how_many_downloaded = 0
|
||||
duplicates = 0
|
||||
|
||||
folder_dir.mkdir(exist_ok=True)
|
||||
logger.info(folder_name)
|
||||
|
||||
def _download_album(self, images: dict):
|
||||
out = []
|
||||
for i, image in enumerate(images):
|
||||
path = urllib.parse.urlparse(image['url']).path
|
||||
extension = pathlib.Path(path).suffix
|
||||
out.append(self._download_resource(image['url']))
|
||||
|
||||
filename = pathlib.Path("_".join([str(i + 1), image['id']]) + extension)
|
||||
|
||||
logger.info("\n ({}/{})".format(i + 1, count))
|
||||
|
||||
try:
|
||||
self._download_resource(filename, folder_dir, image['url'], indent=2)
|
||||
how_many_downloaded += 1
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||
duplicates += 1
|
||||
|
||||
except TypeInSkip:
|
||||
logger.info(" Skipping...")
|
||||
how_many_downloaded += 1
|
||||
|
||||
except Exception as exception:
|
||||
logger.info("\n Could not get the file")
|
||||
logger.info(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
||||
class_name=exception.__class__.__name__, info=str(exception)) + "\n"
|
||||
)
|
||||
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
|
||||
|
||||
if duplicates == count:
|
||||
raise FileAlreadyExistsError
|
||||
elif how_many_downloaded + duplicates < count:
|
||||
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
||||
return out
|
||||
|
||||
@@ -6,14 +6,14 @@ import re
|
||||
import urllib.request
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||
|
||||
|
||||
class Gfycat(GifDeliveryNetwork):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
super().download()
|
||||
|
||||
@@ -4,29 +4,23 @@ import pathlib
|
||||
import urllib.request
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
|
||||
|
||||
class GifDeliveryNetwork(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
try:
|
||||
self.post['MEDIAURL'] = self._get_link(self.post['CONTENTURL'])
|
||||
media_url = self._get_link(self.post.url)
|
||||
except IndexError:
|
||||
raise NotADownloadableLinkError("Could not read the page source")
|
||||
|
||||
self.post['EXTENSION'] = self._get_extension(self.post['MEDIAURL'])
|
||||
self.directory.mkdir(exist_ok=True)
|
||||
|
||||
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||
|
||||
self._download_resource(filename, self.directory, self.post['MEDIAURL'])
|
||||
return [self._download_resource(media_url)]
|
||||
|
||||
@staticmethod
|
||||
def _get_link(url: str) -> str:
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.errors import ExtensionError, ImageNotFound, NotADownloadableLinkError
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.site_downloaders.direct import Direct
|
||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
|
||||
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
|
||||
from bulkredditdownloader.utils import GLOBAL, nameCorrector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -19,85 +18,43 @@ class Imgur(BaseDownloader):
|
||||
|
||||
imgur_image_domain = "https://i.imgur.com/"
|
||||
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.raw_data = {}
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
link = self.post['CONTENTURL']
|
||||
link = self.post.url
|
||||
|
||||
if link.endswith(".gifv"):
|
||||
link = link.replace(".gifv", ".mp4")
|
||||
Direct(self.directory, {**self.post, 'CONTENTURL': link})
|
||||
return
|
||||
direct_thing = Direct(self.directory, self.post)
|
||||
return direct_thing.download()
|
||||
|
||||
self.raw_data = self._get_data(link)
|
||||
|
||||
if self._is_album:
|
||||
if self.raw_data["album_images"]["count"] != 1:
|
||||
self._download_album(self.raw_data["album_images"])
|
||||
out = self._download_album(self.raw_data["album_images"])
|
||||
else:
|
||||
self._download_image(self.raw_data["album_images"]["images"][0])
|
||||
out = self._download_image(self.raw_data["album_images"]["images"][0])
|
||||
else:
|
||||
self._download_image(self.raw_data)
|
||||
out = self._download_image(self.raw_data)
|
||||
return out
|
||||
|
||||
def _download_album(self, images: dict):
|
||||
folder_name = GLOBAL.config['filename'].format(**self.post)
|
||||
folder_dir = self.directory / folder_name
|
||||
|
||||
images_length = images["count"]
|
||||
how_many_downloaded = 0
|
||||
duplicates = 0
|
||||
|
||||
folder_dir.mkdir(exist_ok=True)
|
||||
logger.info(folder_name)
|
||||
out = []
|
||||
|
||||
for i in range(images_length):
|
||||
extension = self._validate_extension(images["images"][i]["ext"])
|
||||
image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension
|
||||
filename = pathlib.Path("_".join([str(i + 1),
|
||||
nameCorrector(images["images"][i]['title']),
|
||||
images["images"][i]['hash']]) + extension)
|
||||
|
||||
logger.info("\n ({}/{})".format(i + 1, images_length))
|
||||
|
||||
try:
|
||||
self._download_resource(filename, folder_dir, image_url, indent=2)
|
||||
how_many_downloaded += 1
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||
duplicates += 1
|
||||
|
||||
except TypeInSkip:
|
||||
logger.info(" Skipping...")
|
||||
how_many_downloaded += 1
|
||||
|
||||
except Exception as exception:
|
||||
logger.info("\n Could not get the file")
|
||||
logger.info(
|
||||
" "
|
||||
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
||||
class_name=exception.__class__.__name__,
|
||||
info=str(exception)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
|
||||
|
||||
if duplicates == images_length:
|
||||
raise FileAlreadyExistsError
|
||||
elif how_many_downloaded + duplicates < images_length:
|
||||
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
||||
out.append(self._download_resource(image_url))
|
||||
return out
|
||||
|
||||
def _download_image(self, image: dict):
|
||||
extension = self._validate_extension(image["ext"])
|
||||
image_url = self.imgur_image_domain + image["hash"] + extension
|
||||
|
||||
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
||||
|
||||
self._download_resource(filename, self.directory, image_url)
|
||||
return [self._download_resource(image_url)]
|
||||
|
||||
def _is_album(self) -> bool:
|
||||
return "album_images" in self.raw_data
|
||||
@@ -134,9 +91,8 @@ class Imgur(BaseDownloader):
|
||||
@staticmethod
|
||||
def _validate_extension(extension_suffix: str) -> str:
|
||||
possible_extensions = [".jpg", ".png", ".mp4", ".gif"]
|
||||
|
||||
for extension in possible_extensions:
|
||||
if extension in extension_suffix:
|
||||
return extension
|
||||
else:
|
||||
raise ExtensionError(f"\"{extension_suffix}\" is not recognized as a valid extension.")
|
||||
raise ExtensionError(f'"{extension_suffix}" is not recognized as a valid extension for Imgur')
|
||||
|
||||
@@ -5,24 +5,22 @@ import pathlib
|
||||
import urllib.request
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||
|
||||
|
||||
class Redgifs(GifDeliveryNetwork):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
super().download()
|
||||
|
||||
@staticmethod
|
||||
def _get_link(url: str) -> str:
|
||||
"""Extract direct link to the video from page's source
|
||||
and return it
|
||||
"""
|
||||
"""Extract direct link to the video from page's source and return it"""
|
||||
if '.webm' in url or '.mp4' in url or '.gif' in url:
|
||||
return url
|
||||
|
||||
|
||||
@@ -1,64 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import io
|
||||
import logging
|
||||
import pathlib
|
||||
from pathlib import Path
|
||||
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.resource import Resource
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SelfPost(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
if "self" in GLOBAL.arguments.skip:
|
||||
raise TypeInSkip
|
||||
return Resource(self.post, self.post.url, bytes(self.export_to_string()))
|
||||
|
||||
self.directory.mkdir(exist_ok=True)
|
||||
filename = GLOBAL.config['filename'].format(**self.post)
|
||||
|
||||
file_dir = self.directory / (filename + ".md")
|
||||
logger.info(file_dir)
|
||||
logger.info(filename + ".md")
|
||||
|
||||
if Path.is_file(file_dir):
|
||||
raise FileAlreadyExistsError
|
||||
|
||||
try:
|
||||
self._write_to_file(file_dir, self.post)
|
||||
except FileNotFoundError:
|
||||
file_dir = self.post['POSTID'] + ".md"
|
||||
file_dir = self.directory / file_dir
|
||||
|
||||
self._write_to_file(file_dir, self.post)
|
||||
|
||||
@staticmethod
|
||||
def _write_to_file(directory: pathlib.Path, post: dict):
|
||||
def export_to_string(self) -> str:
|
||||
"""Self posts are formatted here"""
|
||||
content = ("## ["
|
||||
+ post["TITLE"]
|
||||
+ self.post.fullname
|
||||
+ "]("
|
||||
+ post["CONTENTURL"]
|
||||
+ self.post.url
|
||||
+ ")\n"
|
||||
+ post["CONTENT"]
|
||||
+ self.post.selftext
|
||||
+ "\n\n---\n\n"
|
||||
+ "submitted to [r/"
|
||||
+ post["SUBREDDIT"]
|
||||
+ self.post.subreddit.title
|
||||
+ "](https://www.reddit.com/r/"
|
||||
+ post["SUBREDDIT"]
|
||||
+ self.post.subreddit.title
|
||||
+ ") by [u/"
|
||||
+ post["REDDITOR"]
|
||||
+ self.post.author.name
|
||||
+ "](https://www.reddit.com/user/"
|
||||
+ post["REDDITOR"]
|
||||
+ self.post.author.name
|
||||
+ ")")
|
||||
|
||||
with io.open(directory, "w", encoding="utf-8") as FILE:
|
||||
print(content, file=FILE)
|
||||
logger.info("Downloaded")
|
||||
return content
|
||||
|
||||
@@ -4,61 +4,49 @@ import logging
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.resource import Resource
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VReddit(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
extension = ".mp4"
|
||||
self.directory.mkdir(exist_ok=True)
|
||||
|
||||
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
||||
|
||||
try:
|
||||
fnull = open(os.devnull, 'w')
|
||||
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
|
||||
except Exception:
|
||||
self._download_resource(filename, self.directory, self.post['CONTENTURL'])
|
||||
logger.info("FFMPEG library not found, skipping merging video and audio")
|
||||
except subprocess.SubprocessError:
|
||||
return self._download_resource(self.post.url)
|
||||
else:
|
||||
video_name = self.post['POSTID'] + "_video"
|
||||
video_url = self.post['CONTENTURL']
|
||||
audio_name = self.post['POSTID'] + "_audio"
|
||||
video_url = self.post.url
|
||||
audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4'
|
||||
|
||||
logger.info(self.directory, filename, sep="\n")
|
||||
|
||||
self._download_resource(video_name, self.directory, video_url, silent=True)
|
||||
self._download_resource(audio_name, self.directory, audio_url, silent=True)
|
||||
try:
|
||||
self._merge_audio(video_name, audio_name, filename, self.directory)
|
||||
except KeyboardInterrupt:
|
||||
(self.directory / filename).unlink()
|
||||
(self.directory / audio_name).unlink()
|
||||
(self.directory / video_name).unlink()
|
||||
(self.directory / filename).unlink()
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
video = requests.get(video_url).content
|
||||
audio = requests.get(audio_url).content
|
||||
with open(temp_dir / 'video', 'wb')as file:
|
||||
file.write(video)
|
||||
with open(temp_dir / 'audio', 'wb') as file:
|
||||
file.write(audio)
|
||||
self._merge_audio(temp_dir)
|
||||
with open(temp_dir / 'output.mp4', 'rb') as file:
|
||||
content = file.read()
|
||||
return Resource(self.post, self.post.url, content)
|
||||
|
||||
@staticmethod
|
||||
def _merge_audio(
|
||||
video: pathlib.Path,
|
||||
audio: pathlib.Path,
|
||||
filename: pathlib.Path,
|
||||
directory: pathlib.Path):
|
||||
input_video = str(directory / video)
|
||||
input_audio = str(directory / audio)
|
||||
def _merge_audio(working_directory: pathlib.Path):
|
||||
input_video = working_directory / 'video'
|
||||
input_audio = working_directory / 'audio'
|
||||
|
||||
fnull = open(os.devnull, 'w')
|
||||
cmd = "ffmpeg -i {} -i {} -c:v copy -c:a aac -strict experimental {}".format(
|
||||
input_audio, input_video, str(directory / filename))
|
||||
input_audio, input_video, str(working_directory / 'output.mp4'))
|
||||
subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT)
|
||||
|
||||
(directory / video).unlink()
|
||||
(directory / audio).unlink()
|
||||
|
||||
@@ -1,64 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import youtube_dl
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.resource import Resource
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
from bulkredditdownloader.errors import FileAlreadyExistsError
|
||||
from bulkredditdownloader.utils import GLOBAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Youtube(BaseDownloader):
|
||||
def __init__(self, directory: pathlib.Path, post: dict):
|
||||
def __init__(self, directory: pathlib.Path, post: Submission):
|
||||
super().__init__(directory, post)
|
||||
self.download()
|
||||
|
||||
def download(self):
|
||||
self.directory.mkdir(exist_ok=True)
|
||||
return self._download_video()
|
||||
|
||||
filename = GLOBAL.config['filename'].format(**self.post)
|
||||
logger.info(filename)
|
||||
def _download_video(self) -> Resource:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
ydl_opts = {
|
||||
"format": "best",
|
||||
"outtmpl": str(temp_dir / "test.%(ext)s"),
|
||||
"playlistend": 1,
|
||||
"nooverwrites": True,
|
||||
"quiet": True
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([self.post.url])
|
||||
|
||||
self._download_video(filename, self.directory, self.post['CONTENTURL'])
|
||||
|
||||
def _download_video(self, filename: str, directory: pathlib.Path, url: str):
|
||||
ydl_opts = {
|
||||
"format": "best",
|
||||
"outtmpl": str(directory / (filename + ".%(ext)s")),
|
||||
"progress_hooks": [self._hook],
|
||||
"playlistend": 1,
|
||||
"nooverwrites": True,
|
||||
"quiet": True
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
|
||||
location = directory / (filename + ".mp4")
|
||||
|
||||
with open(location, 'rb') as file:
|
||||
content = file.read()
|
||||
|
||||
if GLOBAL.arguments.no_dupes:
|
||||
try:
|
||||
file_hash = self._create_hash(content)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
if file_hash in GLOBAL.downloadedPosts():
|
||||
os.remove(location)
|
||||
raise FileAlreadyExistsError
|
||||
GLOBAL.downloadedPosts.add(file_hash)
|
||||
|
||||
@staticmethod
|
||||
def _hook(d):
|
||||
if d['status'] == 'finished':
|
||||
return logger.info("Downloaded")
|
||||
downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6)))
|
||||
file_size = int(d['total_bytes'] * (10**(-6)))
|
||||
sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))
|
||||
sys.stdout.flush()
|
||||
with open(temp_dir / 'test.mp4', 'rb') as file:
|
||||
content = file.read()
|
||||
return Resource(self.post, self.post.url, content)
|
||||
|
||||
Reference in New Issue
Block a user