Move to different program structure

This commit is contained in:
Serene-Arc
2021-02-11 09:10:40 +10:00
committed by Ali Parlakci
parent a72abd6603
commit a7f1db14e5
24 changed files with 504 additions and 2133 deletions

View File

@@ -4,338 +4,158 @@
This program downloads imgur, gfycat and direct image and video links of
saved posts from a reddit account. It is written in Python 3.
"""
import argparse
import logging
import os
import sys
import time
from io import StringIO
from pathlib import Path
from prawcore.exceptions import InsufficientScope
from bulkredditdownloader.arguments import Arguments
from bulkredditdownloader.config import Config
from bulkredditdownloader.site_downloaders.direct import Direct
from bulkredditdownloader.site_downloaders.erome import Erome
from bulkredditdownloader.site_downloaders.gallery import Gallery
from bulkredditdownloader.site_downloaders.gfycat import Gfycat
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.site_downloaders.imgur import Imgur
from bulkredditdownloader.site_downloaders.redgifs import Redgifs
from bulkredditdownloader.site_downloaders.self_post import SelfPost
from bulkredditdownloader.site_downloaders.vreddit import VReddit
from bulkredditdownloader.site_downloaders.youtube import Youtube
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, DomainInSkip, FailedToDownload, FileAlreadyExistsError,
ImgurLimitError, ImgurLoginError, InvalidJSONFile, NoSuitablePost, NotADownloadableLinkError,
TypeInSkip, full_exc_info)
from bulkredditdownloader.json_helper import JsonFile
from bulkredditdownloader.program_mode import ProgramMode
from bulkredditdownloader.reddit import Reddit
from bulkredditdownloader.searcher import getPosts
from bulkredditdownloader.store import Store
from bulkredditdownloader.utils import GLOBAL, createLogFile, nameCorrector, printToFile
from bulkredditdownloader.downloader import RedditDownloader
from bulkredditdownloader.errors import BulkDownloaderException
from time import sleep
__author__ = "Ali Parlakci"
__license__ = "GPL"
__version__ = "1.10.0"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
logger = logging.getLogger()
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads media from reddit posts")
def postFromLog(filename):
"""Analyze a log file and return a list of dictionaries containing
submissions
"""
if Path.is_file(Path(filename)):
content = JsonFile(filename).read()
def _add_options():
parser.add_argument("directory",
help="Specifies the directory where posts will be downloaded to",
metavar="DIRECTORY")
parser.add_argument("--verbose", "-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit", "-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link", "-l",
help="Get posts from link",
metavar="link")
parser.add_argument("--saved",
action="store_true",
required="--unsave" in sys.argv,
help="Triggers saved mode")
parser.add_argument("--unsave",
action="store_true",
help="Unsaves downloaded posts")
parser.add_argument("--submitted",
action="store_true",
help="Gets posts of --user")
parser.add_argument("--upvoted",
action="store_true",
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Takes a log file which created by itself (json files),reads posts and tries "
"downloading them again.",
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's name without r/. use \"frontpage\" "
"for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes multireddit's name without m",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for current user",
required="--multireddit" in sys.argv or "--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument("--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising or relevance default: hot",
choices=["hot", "top", "new", "controversial", "rising", "relevance"],
metavar="SORT TYPE",
type=str)
parser.add_argument("--limit",
help="default: unlimited",
metavar="Limit",
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all. default: all",
choices=["all", "hour", "day", "week", "month", "year"],
metavar="TIME_LIMIT",
type=str)
parser.add_argument("--skip",
nargs="+",
help="Skip posts with given type",
type=str,
choices=["images", "videos", "gifs", "self"],
default=[])
parser.add_argument("--skip-domain",
nargs="+",
help="Skip posts with given domain",
type=str,
default=[])
parser.add_argument("--set-folderpath",
action="store_true",
help="Set custom folderpath",
default='{SUBREDDIT}'
)
parser.add_argument("--set-filename",
action="store_true",
help="Set custom filename",
default='{REDDITOR}_{TITLE}_{POSTID}'
)
parser.add_argument("--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument("--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument("--use-local-config",
action="store_true",
help="Creates a config file in the program's directory"
" and uses it. Useful for having multiple configs",
)
parser.add_argument("--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument("--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str
)
parser.add_argument("--no-download",
action="store_true",
help="Just saved posts into a the POSTS.json file without downloading"
)
def _setup_logging(verbosity: int):
logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
stream.setFormatter(formatter)
logger.addHandler(stream)
if verbosity < 0:
stream.setLevel(logging.INFO)
else:
print("File not found")
sys.exit()
stream.setLevel(logging.DEBUG)
logging.getLogger('praw').setLevel(logging.CRITICAL)
logging.getLogger('prawcore').setLevel(logging.CRITICAL)
logging.getLogger('urllib3').setLevel(logging.CRITICAL)
def main(args: argparse.Namespace):
_setup_logging(args.verbose)
try:
del content["HEADER"]
except KeyError:
pass
reddit_downloader = RedditDownloader(args)
reddit_downloader.download()
except BulkDownloaderException as e:
logger.critical(f'An error occured {e}')
posts = []
for post in content:
if not content[post][-1]['TYPE'] is None:
posts.append(content[post][-1])
return posts
def isPostExists(post, directory):
"""Figure out a file's name and checks if the file already exists"""
filename = GLOBAL.config['filename'].format(**post)
possible_extensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md", ".mkv", ".flv"]
for extension in possible_extensions:
path = directory / Path(filename + extension)
if path.exists():
return True
return False
def downloadPost(submission, directory):
downloaders = {
"imgur": Imgur, "gfycat": Gfycat, "erome": Erome, "direct": Direct, "self": SelfPost,
"redgifs": Redgifs, "gifdeliverynetwork": GifDeliveryNetwork,
"v.redd.it": VReddit, "youtube": Youtube, "gallery": Gallery
}
print()
if submission['TYPE'] in downloaders:
downloaders[submission['TYPE']](directory, submission)
else:
raise NoSuitablePost
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
downloaded_count = 0
duplicates = 0
failed_file = createLogFile("FAILED")
if GLOBAL.arguments.unsave:
reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin()
subs_length = len(submissions)
for i in range(len(submissions)):
print(f"\n({i+1}/{subs_length})", end="")
print(submissions[i]['POSTID'],
f"r/{submissions[i]['SUBREDDIT']}",
f"u/{submissions[i]['REDDITOR']}",
submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "",
sep="",
end="")
print(f" {submissions[i]['TYPE'].upper()}", end="", no_print=True)
directory = GLOBAL.directory / \
GLOBAL.config["folderpath"].format(**submissions[i])
details = {
**submissions[i],
**{"TITLE": nameCorrector(
submissions[i]['TITLE'],
reference=str(directory)
+ GLOBAL.config['filename'].format(**submissions[i])
+ ".ext")}
}
filename = GLOBAL.config['filename'].format(**details)
if isPostExists(details, directory):
print()
print(directory)
print(filename)
print("It already exists")
duplicates += 1
continue
if any(domain in submissions[i]['CONTENTURL'] for domain in GLOBAL.arguments.skip):
print()
print(submissions[i]['CONTENTURL'])
print("Domain found in skip domains, skipping post...")
continue
try:
downloadPost(details, directory)
GLOBAL.downloadedPosts.add(details['POSTID'])
try:
if GLOBAL.arguments.unsave:
reddit.submission(id=details['POSTID']).unsave()
except InsufficientScope:
reddit = Reddit().begin()
reddit.submission(id=details['POSTID']).unsave()
downloaded_count += 1
except FileAlreadyExistsError:
print("It already exists")
GLOBAL.downloadedPosts.add(details['POSTID'])
duplicates += 1
except ImgurLoginError:
print("Imgur login failed. \nQuitting the program as unexpected errors might occur.")
sys.exit()
except ImgurLimitError as exception:
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)), details
]})
except NotADownloadableLinkError as exception:
print("{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)))
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)),
submissions[i]
]})
except TypeInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except DomainInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except NoSuitablePost:
print("No match found, skipping...")
except FailedToDownload:
print("Failed to download the posts, skipping...")
except AlbumNotDownloadedCompletely:
print("Album did not downloaded completely.")
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
except Exception as exc:
print("{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exc.__class__.__name__, info=str(exc))
)
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
if duplicates:
print(f"\nThere {'were' if duplicates > 1 else 'was'} {duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloaded_count == 0:
print("Nothing is downloaded :(")
else:
print(f"Total of {downloaded_count} link{'s' if downloaded_count > 1 else ''} downloaded!")
def printLogo():
VanillaPrint(f"\nBulk Downloader for Reddit v{__version__}\n"
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n"
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n"
)
def main():
if Path("config.json").exists():
GLOBAL.configDirectory = Path("config.json")
else:
if not Path(GLOBAL.defaultConfigDirectory).is_dir():
os.makedirs(GLOBAL.defaultConfigDirectory)
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
try:
GLOBAL.config = Config(GLOBAL.configDirectory).generate()
except InvalidJSONFile as exception:
VanillaPrint(str(exception.__class__.__name__), ">>", str(exception))
VanillaPrint("Resolve it or remove it to proceed")
sys.exit()
sys.argv = sys.argv + GLOBAL.config["options"].split()
arguments = Arguments.parse()
GLOBAL.arguments = arguments
if arguments.set_filename:
Config(GLOBAL.configDirectory).setCustomFileName()
sys.exit()
if arguments.set_folderpath:
Config(GLOBAL.configDirectory).setCustomFolderPath()
sys.exit()
if arguments.set_default_directory:
Config(GLOBAL.configDirectory).setDefaultDirectory()
sys.exit()
if arguments.set_default_options:
Config(GLOBAL.configDirectory).setDefaultOptions()
sys.exit()
if arguments.use_local_config:
JsonFile("config.json").add(GLOBAL.config)
sys.exit()
if arguments.directory:
GLOBAL.directory = Path(arguments.directory.strip())
elif "default_directory" in GLOBAL.config and GLOBAL.config["default_directory"] != "":
GLOBAL.directory = Path(
GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME))
else:
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
if arguments.downloaded_posts:
GLOBAL.downloadedPosts = Store(arguments.downloaded_posts)
else:
GLOBAL.downloadedPosts = Store()
printLogo()
print("\n", " ".join(sys.argv), "\n", no_print=True)
if arguments.log is not None:
log_dir = Path(arguments.log)
download(postFromLog(log_dir))
sys.exit()
program_mode = ProgramMode(arguments).generate()
try:
posts = getPosts(program_mode)
except Exception as exc:
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
print(exc)
sys.exit()
if posts is None:
print("I could not find any posts in that URL")
sys.exit()
if GLOBAL.arguments.no_download:
pass
else:
download(posts)
if __name__ == "__main__":
GLOBAL.log_stream = StringIO()
logging.basicConfig(stream=GLOBAL.log_stream, level=logging.INFO)
try:
VanillaPrint = print
print = printToFile
GLOBAL.RUN_TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime(time.time())))
main()
except KeyboardInterrupt:
if GLOBAL.directory is None:
GLOBAL.directory = Path("../..\\")
except Exception as exception:
if GLOBAL.directory is None:
GLOBAL.directory = Path("../..\\")
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue())
if not GLOBAL.arguments.quit:
input("\nPress enter to quit\n")
if __name__ == '__main__':
_add_options()
args = parser.parse_args()
main(args)

View File

@@ -1,153 +0,0 @@
import argparse
import sys
class Arguments:
@staticmethod
def parse(arguments=None):
"""Initialize argparse and add arguments"""
if arguments is None:
arguments = []
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads media from reddit posts")
parser.add_argument("--directory", "-d",
help="Specifies the directory where posts will be downloaded to",
metavar="DIRECTORY")
parser.add_argument("--verbose", "-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit", "-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link", "-l",
help="Get posts from link",
metavar="link")
parser.add_argument("--saved",
action="store_true",
required="--unsave" in sys.argv,
help="Triggers saved mode")
parser.add_argument("--unsave",
action="store_true",
help="Unsaves downloaded posts")
parser.add_argument("--submitted",
action="store_true",
help="Gets posts of --user")
parser.add_argument("--upvoted",
action="store_true",
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Takes a log file which created by itself (json files),reads posts and tries "
"downloading them again.",
# type=argparse.FileType('r'),
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's name without r/. use \"frontpage\" "
"for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes multireddit's name without m",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for current user",
required="--multireddit" in sys.argv or "--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument(
"--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising or relevance default: hot",
choices=["hot", "top", "new", "controversial", "rising", "relevance"],
metavar="SORT TYPE",
type=str)
parser.add_argument("--limit",
help="default: unlimited",
metavar="Limit",
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all. default: all",
choices=["all", "hour", "day", "week", "month", "year"],
metavar="TIME_LIMIT",
type=str)
parser.add_argument("--skip",
nargs="+",
help="Skip posts with given type",
type=str,
choices=["images", "videos", "gifs", "self"],
default=[])
parser.add_argument("--skip-domain",
nargs="+",
help="Skip posts with given domain",
type=str,
default=[])
parser.add_argument("--set-folderpath",
action="store_true",
help="Set custom folderpath"
)
parser.add_argument("--set-filename",
action="store_true",
help="Set custom filename",
)
parser.add_argument("--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument("--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument("--use-local-config",
action="store_true",
help="Creates a config file in the program's directory"
" and uses it. Useful for having multiple configs",
)
parser.add_argument("--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument("--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str
)
parser.add_argument("--no-download",
action="store_true",
help="Just saved posts into a the POSTS.json file without downloading"
)
if not arguments:
return parser.parse_args()
else:
return parser.parse_args(arguments)

View File

@@ -1,109 +0,0 @@
from bulkredditdownloader.reddit import Reddit
from bulkredditdownloader.json_helper import JsonFile
from bulkredditdownloader.utils import nameCorrector
class Config:
def __init__(self, filename: str):
self.filename = filename
self.file = JsonFile(self.filename)
def generate(self) -> dict:
self._validateCredentials()
self._readCustomFileName()
self._readCustomFolderPath()
self._readDefaultOptions()
return self.file.read()
def setCustomFileName(self):
print("""
IMPORTANT: Do not change the filename structure frequently.
If you did, the program could not find duplicates and
would download the already downloaded files again.
This would not create any duplicates in the directory but
the program would not be as snappy as it should be.
Type a template file name for each post.
You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post
For example: {FLAIR}_{SUBREDDIT}_{REDDITOR}
Existing filename template:""", None if "filename" not in self.file.read() else self.file.read()["filename"])
filename = nameCorrector(input(">> ").upper())
self.file.add({"filename": filename})
def _readCustomFileName(self):
content = self.file.read()
if "filename" not in content:
self.file.add({"filename": "{REDDITOR}_{TITLE}_{POSTID}"})
content = self.file.read()
if "{POSTID}" not in content["filename"]:
self.file.add({"filename": content["filename"] + "_{POSTID}"})
def setCustomFolderPath(self):
print("""
Type a folder structure (generic folder path)
Use slash or DOUBLE backslash to separate folders
You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post
For example: {REDDITOR}/{SUBREDDIT}/{FLAIR}
Existing folder structure""", None if "folderpath" not in self.file.read() else self.file.read()["folderpath"])
folderpath = nameCorrector(input(">> ").strip("\\").strip("/").upper())
self.file.add({"folderpath": folderpath})
def _readCustomFolderPath(self, path=None):
content = self.file.read()
if "folderpath" not in content:
self.file.add({"folderpath": "{SUBREDDIT}"})
def setDefaultOptions(self):
print("""
Type options to be used everytime script runs
For example: --no-dupes --quit --limit 100 --skip youtube.com
Existing default options:""", None if "options" not in self.file.read() else self.file.read()["options"])
options = input(">> ").strip("")
self.file.add({"options": options})
def _readDefaultOptions(self):
content = self.file.read()
if "options" not in content:
self.file.add({"options": ""})
def _validateCredentials(self):
"""Read credentials from config.json file"""
try:
content = self.file.read()["credentials"]
except BaseException:
self.file.add({"credentials": {}})
content = self.file.read()["credentials"]
if "reddit" in content and len(content["reddit"]) != 0:
pass
else:
Reddit().begin()
print()
def setDefaultDirectory(self):
print("""Set a default directory to use in case no directory is given
Leave blank to reset it. You can use {time} in foler names to use to timestamp it
For example: D:/archive/BDFR_{time}
""")
print("Current default directory:", self.file.read()[
"default_directory"] if "default_directory" in self.file.read() else "")
self.file.add({"default_directory": input(">> ")})

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env python3
# coding=utf-8
import argparse
import configparser
import logging
import socket
from datetime import datetime
from enum import Enum, auto
from pathlib import Path
import appdirs
import praw
import praw.models
from bulkredditdownloader.download_filter import DownloadFilter
from bulkredditdownloader.errors import NotADownloadableLinkError, RedditAuthenticationError
from bulkredditdownloader.file_name_formatter import FileNameFormatter
from bulkredditdownloader.site_downloaders.download_factory import DownloadFactory
logger = logging.getLogger(__name__)
class RedditTypes:
class SortType(Enum):
HOT = auto()
RISING = auto()
CONTROVERSIAL = auto()
NEW = auto()
RELEVENCE = auto()
class TimeType(Enum):
HOUR = auto()
DAY = auto()
WEEK = auto()
MONTH = auto()
YEAR = auto()
ALL = auto()
class RedditDownloader:
def __init__(self, args: argparse.Namespace):
self.config_directories = appdirs.AppDirs('bulk_reddit_downloader')
self.run_time = datetime.now().isoformat()
self._setup_internal_objects(args)
self.reddit_lists = self._retrieve_reddit_lists(args)
def _setup_internal_objects(self, args: argparse.Namespace):
self.download_filter = RedditDownloader._create_download_filter(args)
self.time_filter = RedditDownloader._create_time_filter(args)
self.sort_filter = RedditDownloader._create_sort_filter(args)
self.file_name_formatter = RedditDownloader._create_file_name_formatter(args)
self._determine_directories(args)
self.master_hash_list = []
self._load_config(args)
if self.cfg_parser.has_option('DEFAULT', 'username') and self.cfg_parser.has_option('DEFAULT', 'password'):
self.authenticated = True
self.reddit_instance = praw.Reddit(client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
user_agent=socket.gethostname(),
username=self.cfg_parser.get('DEFAULT', 'username'),
password=self.cfg_parser.get('DEFAULT', 'password'))
else:
self.authenticated = False
self.reddit_instance = praw.Reddit(client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
user_agent=socket.gethostname())
def _retrieve_reddit_lists(self, args: argparse.Namespace) -> list[praw.models.ListingGenerator]:
master_list = []
master_list.extend(self._get_subreddits(args))
master_list.extend(self._get_multireddits(args))
master_list.extend(self._get_user_data(args))
return master_list
def _determine_directories(self, args: argparse.Namespace):
self.download_directory = Path(args.directory)
self.logfile_directory = self.download_directory / 'LOG_FILES'
self.config_directory = self.config_directories.user_config_dir
def _load_config(self, args: argparse.Namespace):
self.cfg_parser = configparser.ConfigParser()
if args.use_local_config and Path('./config.cfg').exists():
self.cfg_parser.read(Path('./config.cfg'))
else:
self.cfg_parser.read(Path('./default_config.cfg').resolve())
def _get_subreddits(self, args: argparse.Namespace) -> list[praw.models.ListingGenerator]:
if args.subreddit:
subreddits = [self.reddit_instance.subreddit(chosen_subreddit) for chosen_subreddit in args.subreddit]
if self.sort_filter is RedditTypes.SortType.NEW:
sort_function = praw.models.Subreddit.new
elif self.sort_filter is RedditTypes.SortType.RISING:
sort_function = praw.models.Subreddit.rising
elif self.sort_filter is RedditTypes.SortType.CONTROVERSIAL:
sort_function = praw.models.Subreddit.controversial
else:
sort_function = praw.models.Subreddit.hot
return [sort_function(reddit) for reddit in subreddits]
else:
return []
def _get_multireddits(self, args: argparse.Namespace) -> list[praw.models.ListingGenerator]:
if args.multireddit:
if self.authenticated:
return [self.reddit_instance.multireddit(m_reddit_choice) for m_reddit_choice in args.multireddit]
else:
raise RedditAuthenticationError('Accessing multireddits requires authentication')
else:
return []
def _get_user_data(self, args: argparse.Namespace) -> list[praw.models.ListingGenerator]:
if any((args.upvoted, args.submitted, args.saved)):
if self.authenticated:
generators = []
if args.upvoted:
generators.append(self.reddit_instance.redditor(args.user).upvoted)
if args.submitted:
generators.append(self.reddit_instance.redditor(args.user).submissions)
if args.saved:
generators.append(self.reddit_instance.redditor(args.user).saved)
return generators
else:
raise RedditAuthenticationError('Accessing user lists requires authentication')
else:
return []
@staticmethod
def _create_file_name_formatter(args: argparse.Namespace) -> FileNameFormatter:
return FileNameFormatter(args.set_filename, args.set_folderpath)
@staticmethod
def _create_time_filter(args: argparse.Namespace) -> RedditTypes.TimeType:
try:
return RedditTypes.TimeType[args.sort.upper()]
except (KeyError, AttributeError):
return RedditTypes.TimeType.ALL
@staticmethod
def _create_sort_filter(args: argparse.Namespace) -> RedditTypes.SortType:
try:
return RedditTypes.SortType[args.time.upper()]
except (KeyError, AttributeError):
return RedditTypes.SortType.HOT
@staticmethod
def _create_download_filter(args: argparse.Namespace) -> DownloadFilter:
formats = {
"videos": [".mp4", ".webm"],
"images": [".jpg", ".jpeg", ".png", ".bmp"],
"gifs": [".gif"],
"self": []
}
excluded_extensions = [extension for ext_type in args.skip for extension in formats.get(ext_type, ())]
return DownloadFilter(excluded_extensions, args.skip_domain)
def download(self):
for generator in self.reddit_lists:
for submission in generator:
self._download_submission(submission)
def _download_submission(self, submission: praw.models.Submission):
# TODO: check existence here
if self.download_filter.check_url(submission.url):
try:
downloader_class = DownloadFactory.pull_lever(submission.url)
downloader = downloader_class(self.download_directory, submission)
content = downloader.download()
for res in content:
destination = self.file_name_formatter.format_path(res, self.download_directory)
if res.hash.hexdigest() not in self.master_hash_list:
destination.parent.mkdir(parents=True, exist_ok=True)
with open(destination, 'wb') as file:
file.write(res.content)
logger.debug('Written file to {}'.format(destination))
self.master_hash_list.append(res.hash.hexdigest())
logger.debug('Hash added to master list: {}'.format(res.hash.hexdigest()))
logger.info('Downloaded submission {}'.format(submission.name))
except NotADownloadableLinkError as e:
logger.error('Could not download submission {}: {}'.format(submission.name, e))

View File

@@ -1,137 +1,28 @@
import sys
#!/usr/bin/env
def full_exc_info(exc_info):
def current_stack(skip=0):
try:
1 / 0
except ZeroDivisionError:
f = sys.exc_info()[2].tb_frame
for i in range(skip + 2):
f = f.f_back
lst = []
while f is not None:
lst.append((f, f.f_lineno))
f = f.f_back
return lst
def extend_traceback(tb, stack):
class FauxTb():
def __init__(self, tb_frame, tb_lineno, tb_next):
self.tb_frame = tb_frame
self.tb_lineno = tb_lineno
self.tb_next = tb_next
"""Extend traceback with stack info."""
head = tb
for tb_frame, tb_lineno in stack:
head = FauxTb(tb_frame, tb_lineno, head)
return head
"""Like sys.exc_info, but includes the full traceback."""
t, v, tb = exc_info
full_tb = extend_traceback(tb, current_stack(1))
return t, v, full_tb
class RedditLoginFailed(Exception):
class BulkDownloaderException(Exception):
pass
class ImgurLoginError(Exception):
class NotADownloadableLinkError(BulkDownloaderException):
pass
class FileAlreadyExistsError(Exception):
class RedditAuthenticationError(BulkDownloaderException):
pass
class NotADownloadableLinkError(Exception):
class InvalidJSONFile(BulkDownloaderException):
pass
class AlbumNotDownloadedCompletely(Exception):
class FailedToDownload(BulkDownloaderException):
pass
class FileNameTooLong(Exception):
class ImageNotFound(BulkDownloaderException):
pass
class InvalidRedditLink(Exception):
pass
class ProgramModeError(Exception):
pass
class SearchModeError(Exception):
pass
class RedditorNameError(Exception):
pass
class NoMatchingSubmissionFound(Exception):
pass
class NoPrawSupport(Exception):
pass
class NoRedditSupport(Exception):
pass
class MultiredditNotFound(Exception):
pass
class InsufficientPermission(Exception):
pass
class InvalidSortingType(Exception):
pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass
class DirectLinkNotFound(Exception):
pass
class InvalidJSONFile(Exception):
pass
class FailedToDownload(Exception):
pass
class TypeInSkip(Exception):
pass
class DomainInSkip(Exception):
pass
class ImageNotFound(Exception):
pass
class ExtensionError(Exception):
class ExtensionError(BulkDownloaderException):
pass

View File

@@ -1,234 +0,0 @@
from pprint import pprint
try:
from bulkredditdownloader.errors import InvalidRedditLink
except ModuleNotFoundError:
from errors import InvalidRedditLink
def QueryParser(passed_queries: str) -> dict:
extracted_queries = {}
question_mark_index = passed_queries.index("?")
header = passed_queries[:question_mark_index]
extracted_queries["HEADER"] = header
queries = passed_queries[question_mark_index + 1:]
parsed_queries = queries.split("&")
for query in parsed_queries:
query = query.split("=")
extracted_queries[query[0]] = query[1]
if extracted_queries["HEADER"] == "search":
extracted_queries["q"] = extracted_queries["q"].replace("%20", " ")
return extracted_queries
def LinkParser(link: str) -> dict:
result = {}
short_link = False
if "reddit.com" not in link:
raise InvalidRedditLink("Invalid reddit link")
splitted_link = link.split("/")
if splitted_link[0] == "https:" or splitted_link[0] == "http:":
splitted_link = splitted_link[2:]
try:
if (splitted_link[-2].endswith("reddit.com") and
splitted_link[-1] == "") or splitted_link[-1].endswith("reddit.com"):
result["sort"] = "best"
return result
except IndexError:
if splitted_link[0].endswith("reddit.com"):
result["sort"] = "best"
return result
if "redd.it" in splitted_link:
short_link = True
if splitted_link[0].endswith("reddit.com"):
splitted_link = splitted_link[1:]
if "comments" in splitted_link:
result = {"post": link}
return result
elif "me" in splitted_link or \
"u" in splitted_link or \
"user" in splitted_link or \
"r" in splitted_link or \
"m" in splitted_link:
if "r" in splitted_link:
result["subreddit"] = splitted_link[splitted_link.index("r") + 1]
elif "m" in splitted_link:
result["multireddit"] = splitted_link[splitted_link.index("m") + 1]
result["user"] = splitted_link[splitted_link.index("m") - 1]
else:
for index in range(len(splitted_link)):
if splitted_link[index] == "u" or splitted_link[index] == "user":
result["user"] = splitted_link[index + 1]
elif splitted_link[index] == "me":
result["user"] = "me"
for index in range(len(splitted_link)):
if splitted_link[index] in ["hot", "top", "new", "controversial", "rising"]:
result["sort"] = splitted_link[index]
if index == 0:
result["subreddit"] = "frontpage"
elif splitted_link[index] in ["submitted", "saved", "posts", "upvoted"]:
if splitted_link[index] == "submitted" or splitted_link[index] == "posts":
result["submitted"] = {}
elif splitted_link[index] == "saved":
result["saved"] = True
elif splitted_link[index] == "upvoted":
result["upvoted"] = True
elif "?" in splitted_link[index]:
parsed_query = QueryParser(splitted_link[index])
if parsed_query["HEADER"] == "search":
del parsed_query["HEADER"]
result["search"] = parsed_query
elif parsed_query["HEADER"] == "submitted" or \
parsed_query["HEADER"] == "posts":
del parsed_query["HEADER"]
result["submitted"] = parsed_query
else:
del parsed_query["HEADER"]
result["queries"] = parsed_query
if not ("upvoted" in result or
"saved" in result or
"submitted" in result or
"multireddit" in result) and "user" in result:
result["submitted"] = {}
return result
def LinkDesigner(link) -> dict:
attributes = LinkParser(link)
mode = {}
if "post" in attributes:
mode["post"] = attributes["post"]
mode["sort"] = ""
mode["time"] = ""
return mode
elif "search" in attributes:
mode["search"] = attributes["search"]["q"]
if "restrict_sr" in attributes["search"]:
if not (attributes["search"]["restrict_sr"] == 0 or
attributes["search"]["restrict_sr"] == "off" or
attributes["search"]["restrict_sr"] == ""):
if "subreddit" in attributes:
mode["subreddit"] = attributes["subreddit"]
elif "multireddit" in attributes:
mode["multreddit"] = attributes["multireddit"]
mode["user"] = attributes["user"]
else:
mode["subreddit"] = "all"
else:
mode["subreddit"] = "all"
if "t" in attributes["search"]:
mode["time"] = attributes["search"]["t"]
else:
mode["time"] = "all"
if "sort" in attributes["search"]:
mode["sort"] = attributes["search"]["sort"]
else:
mode["sort"] = "relevance"
if "include_over_18" in attributes["search"]:
if attributes["search"]["include_over_18"] == 1 or attributes["search"]["include_over_18"] == "on":
mode["nsfw"] = True
else:
mode["nsfw"] = False
else:
if "queries" in attributes:
if not ("submitted" in attributes or "posts" in attributes):
if "t" in attributes["queries"]:
mode["time"] = attributes["queries"]["t"]
else:
mode["time"] = "day"
else:
if "t" in attributes["queries"]:
mode["time"] = attributes["queries"]["t"]
else:
mode["time"] = "all"
if "sort" in attributes["queries"]:
mode["sort"] = attributes["queries"]["sort"]
else:
mode["sort"] = "new"
else:
mode["time"] = "day"
if "subreddit" in attributes and "search" not in attributes:
mode["subreddit"] = attributes["subreddit"]
elif "user" in attributes and "search" not in attributes:
mode["user"] = attributes["user"]
if "submitted" in attributes:
mode["submitted"] = True
if "sort" in attributes["submitted"]:
mode["sort"] = attributes["submitted"]["sort"]
elif "sort" in mode:
pass
else:
mode["sort"] = "new"
if "t" in attributes["submitted"]:
mode["time"] = attributes["submitted"]["t"]
else:
mode["time"] = "all"
elif "saved" in attributes:
mode["saved"] = True
elif "upvoted" in attributes:
mode["upvoted"] = True
elif "multireddit" in attributes:
mode["multireddit"] = attributes["multireddit"]
if "sort" in attributes:
mode["sort"] = attributes["sort"]
elif "sort" in mode:
pass
else:
mode["sort"] = "hot"
return mode
if __name__ == "__main__":
while True:
link = input("> ")
pprint(LinkDesigner(link))

View File

@@ -1,241 +0,0 @@
import sys
from pathlib import Path
from bulkredditdownloader.errors import InvalidSortingType, ProgramModeError, RedditorNameError, SearchModeError
from bulkredditdownloader.parser import LinkDesigner
import argparse
class ProgramMode:
def __init__(self, arguments: argparse.Namespace):
self.arguments = arguments
def generate(self) -> dict:
try:
self._validateProgramMode()
except ProgramModeError:
self._promptUser()
program_mode = {}
if self.arguments.user is not None:
program_mode["user"] = self.arguments.user
if self.arguments.search is not None:
program_mode["search"] = self.arguments.search
if self.arguments.sort == "hot" or \
self.arguments.sort == "controversial" or \
self.arguments.sort == "rising":
self.arguments.sort = "relevance"
if self.arguments.sort is not None:
program_mode["sort"] = self.arguments.sort
else:
if self.arguments.submitted:
program_mode["sort"] = "new"
else:
program_mode["sort"] = "hot"
if self.arguments.time is not None:
program_mode["time"] = self.arguments.time
else:
program_mode["time"] = "all"
if self.arguments.link is not None:
self.arguments.link = self.arguments.link.strip("\"")
program_mode = LinkDesigner(self.arguments.link)
if self.arguments.search is not None:
program_mode["search"] = self.arguments.search
if self.arguments.sort is not None:
program_mode["sort"] = self.arguments.sort
if self.arguments.time is not None:
program_mode["time"] = self.arguments.time
elif self.arguments.subreddit is not None:
if isinstance(self.arguments.subreddit, list):
self.arguments.subreddit = "+".join(self.arguments.subreddit)
program_mode["subreddit"] = self.arguments.subreddit
elif self.arguments.multireddit is not None:
program_mode["multireddit"] = self.arguments.multireddit
elif self.arguments.saved is True:
program_mode["saved"] = True
elif self.arguments.upvoted is True:
program_mode["upvoted"] = True
elif self.arguments.submitted is not None:
program_mode["submitted"] = True
if self.arguments.sort == "rising":
raise InvalidSortingType("Invalid sorting type has given")
program_mode["limit"] = self.arguments.limit
return program_mode
@staticmethod
def _chooseFrom(choices: list[str]):
print()
choices_by_index = list(str(x) for x in range(len(choices) + 1))
for i in range(len(choices)):
print("{indent}[{order}] {mode}".format(indent=" " * 4, order=i + 1, mode=choices[i]))
print(" " * 4 + "[0] exit\n")
choice = input("> ")
while not choice.lower() in choices + choices_by_index + ["exit"]:
print("Invalid input\n")
input("> ")
if choice == "0" or choice == "exit":
sys.exit()
elif choice in choices_by_index:
return choices[int(choice) - 1]
else:
return choice
def _promptUser(self):
print("select program mode:")
program_modes = ["search", "subreddit", "multireddit", "submitted", "upvoted", "saved", "log"]
program_mode = self._chooseFrom(program_modes)
if program_mode == "search":
self.arguments.search = input("\nquery: ")
self.arguments.subreddit = input("\nsubreddit: ")
print("\nselect sort type:")
sort_types = ["relevance", "top", "new"]
sort_type = self._chooseFrom(sort_types)
self.arguments.sort = sort_type
print("\nselect time filter:")
time_filters = ["hour", "day", "week", "month", "year", "all"]
time_filter = self._chooseFrom(time_filters)
self.arguments.time = time_filter
if program_mode == "subreddit":
subreddit_input = input("(type frontpage for all subscribed subreddits,\n"
" use plus to seperate multi subreddits:"
" pics+funny+me_irl etc.)\n\n"
"subreddit: ")
self.arguments.subreddit = subreddit_input
if " " in self.arguments.subreddit:
self.arguments.subreddit = "+".join(
self.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
if not subreddit_input.lower() == "frontpage" and self.arguments.subreddit[-1] == "+":
self.arguments.subreddit = self.arguments.subreddit[:-1]
print("\nselect sort type:")
sort_types = ["hot", "top", "new", "rising", "controversial"]
sort_type = self._chooseFrom(sort_types)
self.arguments.sort = sort_type
if sort_type in ["top", "controversial"]:
print("\nselect time filter:")
time_filters = ["hour", "day", "week", "month", "year", "all"]
time_filter = self._chooseFrom(time_filters)
self.arguments.time = time_filter
else:
self.arguments.time = "all"
elif program_mode == "multireddit":
self.arguments.user = input("\nmultireddit owner: ")
self.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:")
sort_types = ["hot", "top", "new", "rising", "controversial"]
sort_type = self._chooseFrom(sort_types)
self.arguments.sort = sort_type
if sort_type in ["top", "controversial"]:
print("\nselect time filter:")
time_filters = ["hour", "day", "week", "month", "year", "all"]
time_filter = self._chooseFrom(time_filters)
self.arguments.time = time_filter
else:
self.arguments.time = "all"
elif program_mode == "submitted":
self.arguments.submitted = True
self.arguments.user = input("\nredditor: ")
print("\nselect sort type:")
sort_types = ["hot", "top", "new", "controversial"]
sort_type = self._chooseFrom(sort_types)
self.arguments.sort = sort_type
if sort_type == "top":
print("\nselect time filter:")
time_filters = ["hour", "day", "week", "month", "year", "all"]
time_filter = self._chooseFrom(time_filters)
self.arguments.time = time_filter
else:
self.arguments.time = "all"
elif program_mode == "upvoted":
self.arguments.upvoted = True
self.arguments.user = input("\nredditor: ")
elif program_mode == "saved":
self.arguments.saved = True
elif program_mode == "log":
while True:
self.arguments.log = input("\nlog file directory:")
if Path(self.arguments.log).is_file():
break
while True:
try:
self.arguments.limit = int(input("\nlimit (0 for none): "))
if self.arguments.limit == 0:
self.arguments.limit = None
break
except ValueError:
pass
def _validateProgramMode(self):
"""Check if command-line self.arguments are given correcly,
if not, raise errors
"""
if self.arguments.user is None:
user = 0
else:
user = 1
search = 1 if self.arguments.search else 0
modes = ["saved", "subreddit", "submitted", "log", "link", "upvoted", "multireddit"]
values = {x: 0 if getattr(self.arguments, x) is None or
getattr(self.arguments, x) is False
else 1
for x in modes
}
if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode")
if search + values["saved"] == 2:
raise SearchModeError("You cannot search in your saved posts")
if search + values["submitted"] == 2:
raise SearchModeError("You cannot search in submitted posts")
if search + values["upvoted"] == 2:
raise SearchModeError("You cannot search in upvoted posts")
if search + values["log"] == 2:
raise SearchModeError("You cannot search in log files")
if values["upvoted"] + values["submitted"] == 1 and user == 0:
raise RedditorNameError("No redditor name given")

View File

@@ -1,91 +0,0 @@
import random
import socket
import webbrowser
import praw
from prawcore.exceptions import ResponseException
from bulkredditdownloader.errors import RedditLoginFailed
from bulkredditdownloader.json_helper import JsonFile
from bulkredditdownloader.utils import GLOBAL
class Reddit:
def __init__(self, refresh_token: str = None):
self.SCOPES = ['identity', 'history', 'read', 'save']
self.PORT = 7634
self.refresh_token = refresh_token
self.redditInstance = None
self.arguments = {
"client_id": GLOBAL.reddit_client_id,
"client_secret": GLOBAL.reddit_client_secret,
"user_agent": str(socket.gethostname())
}
def begin(self) -> praw.Reddit:
if self.refresh_token:
self.arguments["refresh_token"] = self.refresh_token
self.redditInstance = praw.Reddit(**self.arguments)
try:
self.redditInstance.auth.scopes()
return self.redditInstance
except ResponseException:
self.arguments["redirect_uri"] = "http://localhost:" + \
str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
else:
self.arguments["redirect_uri"] = "http://localhost:" + \
str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
JsonFile(GLOBAL.configDirectory).add({"reddit_username": str(
reddit.user.me()), "reddit": refresh_token}, "credentials")
return self.redditInstance
def recieve_connection(self) -> socket:
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('0.0.0.0', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, client: socket, message: str):
"""Send message to client and close the connection."""
client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8'))
client.close()
def getRefreshToken(self, scopes: list[str]) -> tuple[praw.Reddit, str]:
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("---Setting up the Reddit API---\n")
print("Go to this URL and login to reddit:\n", url, sep="\n", end="\n\n")
webbrowser.open(url, new=2)
client = self.recieve_connection()
data = client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {key: value for (key, value) in [token.split('=') for token in param_tokens]}
if state != params['state']:
self.send_message(client, 'State mismatch. Expected: {} Received: {}'.format(state, params['state']))
raise RedditLoginFailed
if 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(client,
"<script>"
"alert(\"You can go back to terminal window now.\");"
"</script>"
)
return self.redditInstance, refresh_token

View File

@@ -1,341 +0,0 @@
import sys
import time
import urllib.request
from urllib.error import HTTPError
from prawcore.exceptions import Forbidden, NotFound
from bulkredditdownloader.errors import (InsufficientPermission, InvalidSortingType, MultiredditNotFound, NoMatchingSubmissionFound,
NoPrawSupport)
from bulkredditdownloader.reddit import Reddit
from praw.models.listing.generator import ListingGenerator
from bulkredditdownloader.utils import GLOBAL, createLogFile, printToFile
from praw.models import Submission
print = printToFile
def getPosts(program_mode: dict) -> list[dict]:
"""Call PRAW regarding to arguments and pass it to extractDetails.
Return what extractDetails has returned.
"""
reddit = Reddit(GLOBAL.config["credentials"]["reddit"]).begin()
if program_mode["sort"] == "best":
raise NoPrawSupport("PRAW does not support that")
if "subreddit" in program_mode:
if "search" in program_mode:
if program_mode["subreddit"] == "frontpage":
program_mode["subreddit"] = "all"
if "user" in program_mode:
if program_mode["user"] == "me":
program_mode["user"] = str(reddit.user.me())
if "search" not in program_mode:
if program_mode["sort"] == "top" or program_mode["sort"] == "controversial":
keyword_params = {"time_filter": program_mode["time"], "limit": program_mode["limit"]}
# OTHER SORT TYPES DON'T TAKE TIME_FILTER
else:
keyword_params = {"limit": program_mode["limit"]}
else:
keyword_params = {"time_filter": program_mode["time"], "limit": program_mode["limit"]}
if "search" in program_mode:
if program_mode["sort"] in ["hot", "rising", "controversial"]:
raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in program_mode:
print(
"search for \"{search}\" in\n"
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
search=program_mode["search"],
limit=program_mode["limit"],
sort=program_mode["sort"],
subreddit=program_mode["subreddit"],
time=program_mode["time"]
).upper(), no_print=True
)
return extractDetails(
reddit.subreddit(program_mode["subreddit"]).search(
program_mode["search"],
limit=program_mode["limit"],
sort=program_mode["sort"],
time_filter=program_mode["time"]
)
)
elif "multireddit" in program_mode:
raise NoPrawSupport("PRAW does not support that")
elif "user" in program_mode:
raise NoPrawSupport("PRAW does not support that")
elif "saved" in program_mode:
raise ("Reddit does not support that")
if program_mode["sort"] == "relevance":
raise InvalidSortingType("Invalid sorting type has given")
if "saved" in program_mode:
print("saved posts\nuser:{username}\nlimit={limit}\n".format(
username=reddit.user.me(),
limit=program_mode["limit"]).upper(),
no_print=True
)
return extractDetails(reddit.user.me().saved(limit=program_mode["limit"]))
if "subreddit" in program_mode:
if program_mode["subreddit"] == "frontpage":
print(
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=program_mode["limit"],
sort=program_mode["sort"],
subreddit=program_mode["subreddit"],
time=program_mode["time"]).upper(),
no_print=True
)
return extractDetails(getattr(reddit.front, program_mode["sort"])(**keyword_params))
else:
print(
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=program_mode["limit"],
sort=program_mode["sort"],
subreddit=program_mode["subreddit"],
time=program_mode["time"]).upper(),
no_print=True
)
return extractDetails(
getattr(reddit.subreddit(program_mode["subreddit"]), program_mode["sort"])(**keyword_params)
)
print(
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(), noPrint=True
)
return extractDetails(
getattr(
reddit.subreddit(programMode["subreddit"]), programMode["sort"]
)(**keyword_params)
)
elif "multireddit" in program_mode:
print(
"user: {user}\n"
"multireddit: {multireddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
user=program_mode["user"],
limit=program_mode["limit"],
sort=program_mode["sort"],
multireddit=program_mode["multireddit"],
time=program_mode["time"]).upper(),
no_print=True
)
try:
return extractDetails(
getattr(reddit.multireddit(program_mode["user"], program_mode["multireddit"]),
program_mode["sort"]
)(**keyword_params)
)
except NotFound:
raise MultiredditNotFound("Multireddit not found")
elif "submitted" in program_mode:
print(
"submitted posts of {user}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=program_mode["limit"],
sort=program_mode["sort"],
user=program_mode["user"],
time=program_mode["time"]).upper(),
no_print=True
)
return extractDetails(
getattr(reddit.redditor(program_mode["user"]).submissions, program_mode["sort"])(**keyword_params)
)
elif "upvoted" in program_mode:
print(
"upvoted posts of {user}\nlimit: {limit}\n".format(
user=program_mode["user"],
limit=program_mode["limit"]).upper(),
no_print=True
)
try:
return extractDetails(reddit.redditor(program_mode["user"]).upvoted(limit=program_mode["limit"]))
except Forbidden:
raise InsufficientPermission(
"You do not have permission to do that")
elif "post" in program_mode:
print("post: {post}\n".format(post=program_mode["post"]).upper(), no_print=True)
return extractDetails(reddit.submission(url=program_mode["post"]), single_post=True)
def extractDetails(posts: (ListingGenerator, Submission), single_post=False) -> list[dict]:
"""Check posts and decide if it can be downloaded.
If so, create a dictionary with post details and append them to a list.
Write all of posts to file. Return the list
"""
post_list = []
post_count = 1
all_posts = {}
print("\nGETTING POSTS")
posts_file = createLogFile("POSTS")
if single_post:
submission = posts
post_count += 1
try:
details = {'POSTID': submission.id,
'TITLE': submission.title,
'REDDITOR': str(submission.author),
'TYPE': None,
'CONTENTURL': submission.url,
'SUBREDDIT': submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR': submission.link_flair_text,
'DATE': str(time.strftime("%Y-%m-%d_%H-%M", time.localtime(submission.created_utc)))
}
except AttributeError:
pass
if not any(
domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
result = matchWithDownloader(submission)
if result is not None:
details = {**details, **result}
post_list.append(details)
posts_file.add({post_count: details})
else:
try:
for submission in posts:
if post_count % 100 == 0:
sys.stdout.write("")
sys.stdout.flush()
if post_count % 1000 == 0:
sys.stdout.write("\n" + " " * 14)
sys.stdout.flush()
try:
details = {'POSTID': submission.id,
'TITLE': submission.title,
'REDDITOR': str(submission.author),
'TYPE': None,
'CONTENTURL': submission.url,
'SUBREDDIT': submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR': submission.link_flair_text,
'DATE': str(time.strftime("%Y-%m-%d_%H-%M", time.localtime(submission.created_utc)))
}
except AttributeError:
continue
if details['POSTID'] in GLOBAL.downloadedPosts():
continue
if not any(
domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
result = matchWithDownloader(submission)
if result is not None:
details = {**details, **result}
post_list.append(details)
all_posts[post_count] = details
post_count += 1
except KeyboardInterrupt:
print("\nKeyboardInterrupt", no_print=True)
posts_file.add(all_posts)
if not len(post_list) == 0:
print()
return post_list
else:
raise NoMatchingSubmissionFound("No matching submission was found")
def matchWithDownloader(submission: Submission) -> dict[str, str]:
direct_link = extractDirectLink(submission.url)
if direct_link:
return {'TYPE': 'direct', 'CONTENTURL': direct_link}
if 'v.redd.it' in submission.domain:
bitrates = ["DASH_1080", "DASH_720", "DASH_600", "DASH_480", "DASH_360", "DASH_240"]
for bitrate in bitrates:
video_url = submission.url + "/" + bitrate + ".mp4"
try:
response_code = urllib.request.urlopen(video_url).getcode()
except urllib.error.HTTPError:
response_code = 0
if response_code == 200:
return {'TYPE': 'v.redd.it', 'CONTENTURL': video_url}
if 'gfycat' in submission.domain:
return {'TYPE': 'gfycat'}
if 'youtube' in submission.domain and 'watch' in submission.url:
return {'TYPE': 'youtube'}
if 'youtu.be' in submission.domain:
url = urllib.request.urlopen(submission.url).geturl()
if 'watch' in url:
return {'TYPE': 'youtube'}
elif 'imgur' in submission.domain:
return {'TYPE': 'imgur'}
elif 'erome' in submission.domain:
return {'TYPE': 'erome'}
elif 'redgifs' in submission.domain:
return {'TYPE': 'redgifs'}
elif 'gifdeliverynetwork' in submission.domain:
return {'TYPE': 'gifdeliverynetwork'}
if 'reddit.com/gallery' in submission.url:
return {'TYPE': 'gallery'}
elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
return {'TYPE': 'self',
'CONTENT': submission.selftext}
def extractDirectLink(url: str) -> (bool, str):
"""Check if link is a direct image link.
If so, return URL,
if not, return False
"""
image_types = ['jpg', 'jpeg', 'png', 'mp4', 'webm', 'gif']
if url[-1] == "/":
url = url[:-1]
if "i.reddituploads.com" in url:
return url
for extension in image_types:
if extension == url.split(".")[-1]:
return url
else:
return None

View File

@@ -1,105 +1,46 @@
#!/usr/bin/env python3
# coding=utf-8
import hashlib
import logging
import re
from abc import ABC, abstractmethod
from pathlib import Path
import requests
from praw.models import Submission
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.errors import FailedToDownload
from bulkredditdownloader.resource import Resource
logger = logging.getLogger(__name__)
class BaseDownloader(ABC):
def __init__(self, directory: Path, post: dict):
def __init__(self, directory: Path, post: Submission):
self.directory = directory
self.post = post
self.hashes = []
@abstractmethod
def download(self):
def download(self) -> list[Resource]:
raise NotImplementedError
@staticmethod
def _create_hash(content: bytes) -> str:
hash_md5 = hashlib.md5(content)
return hash_md5.hexdigest()
@staticmethod
def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
formats = {
"videos": [".mp4", ".webm"],
"images": [".jpg", ".jpeg", ".png", ".bmp"],
"gifs": [".gif"],
"self": []
def _download_resource(self, resource_url: str):
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
}
for file_type in GLOBAL.arguments.skip:
for extension in formats[file_type]:
if extension in filename:
raise TypeInSkip
if any(domain in image_url for domain in GLOBAL.arguments.skip_domain):
raise DomainInSkip
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
("Accept-Language", "en-US,en;q=0.8"),
("Connection", "keep-alive")
]
folder_dir.mkdir(exist_ok=True)
if "imgur" not in image_url:
addheaders = headers
else:
addheaders = None
if not silent:
logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
# Loop to attempt download 3 times
for i in range(3):
file_path = Path(folder_dir) / filename
if file_path.is_file():
raise FileAlreadyExistsError
else:
try:
download_content = requests.get(image_url, headers=addheaders).content
except ConnectionResetError:
raise FailedToDownload
file_hash = BaseDownloader._create_hash(download_content)
if GLOBAL.arguments.no_dupes:
if file_hash in GLOBAL.downloadedPosts():
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(file_hash)
with open(file_path, 'wb') as file:
file.write(download_content)
if not silent:
logger.info(" " * indent + "Downloaded" + " " * 10)
return
try:
download_content = requests.get(resource_url, headers=headers).content
except ConnectionResetError:
raise FailedToDownload
return Resource(self.post, resource_url, download_content)
raise FailedToDownload
@staticmethod
def _get_extension(url: str) -> str:
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))')
if results := re.search(pattern, url):
if len(results.groups()) > 1:
return results[0]
if "v.redd.it" not in url:
return '.jpg'
else:
return '.mp4'

View File

@@ -2,18 +2,14 @@
import pathlib
from praw.models import Submission
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.utils import GLOBAL
class Direct(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
self.post['EXTENSION'] = self._get_extension(self.post['CONTENTURL'])
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
self._download_resource(pathlib.Path(filename), self.directory, self.post['CONTENTURL'])
return [self._download_resource(self.post.url)]

View File

@@ -7,77 +7,39 @@ import urllib.error
import urllib.request
from html.parser import HTMLParser
from praw.models import Submission
from bulkredditdownloader.errors import NotADownloadableLinkError
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
logger = logging.getLogger(__name__)
class Erome(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
try:
images = self._get_links(self.post['CONTENTURL'])
images = self._get_links(self.post.url)
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
images_length = len(images)
how_many_downloaded = len(images)
duplicates = 0
if images_length == 1:
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
if len(images) == 1:
image = images[0]
if not re.match(r'https?://.*', image):
image = "https://" + image
self._download_resource(filename, self.directory, image)
return [self._download_resource(image)]
else:
filename = GLOBAL.config['filename'].format(**self.post)
logger.info(filename)
folder_dir = self.directory / filename
folder_dir.mkdir(exist_ok=True)
out = []
for i, image in enumerate(images):
extension = self._get_extension(image)
filename = str(i + 1) + extension
if not re.match(r'https?://.*', image):
image = "https://" + image
logger.info(" ({}/{})".format(i + 1, images_length))
logger.info(" {}".format(filename))
try:
self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2)
except FileAlreadyExistsError:
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
how_many_downloaded -= 1
except Exception as exception:
# raise exception
logger.error("\n Could not get the file")
logger.error(
" "
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
+ "\n"
)
how_many_downloaded -= 1
if duplicates == images_length:
raise FileAlreadyExistsError
elif how_many_downloaded + duplicates < images_length:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
out.append(self._download_resource(image))
return out
@staticmethod
def _get_links(url: str) -> list[str]:

View File

@@ -1,26 +1,23 @@
#!/usr/bin/env python3
import json
import pathlib
import logging
import urllib.parse
import pathlib
import requests
from praw.models import Submission
from bulkredditdownloader.errors import ImageNotFound, NotADownloadableLinkError
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
NotADownloadableLinkError, TypeInSkip)
from bulkredditdownloader.utils import GLOBAL
logger = logging.getLogger(__name__)
class Gallery(BaseDownloader):
def __init__(self, directory: pathlib.Path, post):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
link = self.post['CONTENTURL']
link = self.post.url
self.raw_data = self._get_data(link)
self.download()
def download(self):
images = {}
@@ -37,7 +34,7 @@ class Gallery(BaseDownloader):
except KeyError:
continue
self._download_album(images, count)
return [self._download_album(images)]
@staticmethod
def _get_data(link: str) -> dict:
@@ -63,44 +60,9 @@ class Gallery(BaseDownloader):
data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1])
return data
def _download_album(self, images: dict, count: int):
folder_name = GLOBAL.config['filename'].format(**self.post)
folder_dir = self.directory / folder_name
how_many_downloaded = 0
duplicates = 0
folder_dir.mkdir(exist_ok=True)
logger.info(folder_name)
def _download_album(self, images: dict):
out = []
for i, image in enumerate(images):
path = urllib.parse.urlparse(image['url']).path
extension = pathlib.Path(path).suffix
out.append(self._download_resource(image['url']))
filename = pathlib.Path("_".join([str(i + 1), image['id']]) + extension)
logger.info("\n ({}/{})".format(i + 1, count))
try:
self._download_resource(filename, folder_dir, image['url'], indent=2)
how_many_downloaded += 1
except FileAlreadyExistsError:
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
logger.info(" Skipping...")
how_many_downloaded += 1
except Exception as exception:
logger.info("\n Could not get the file")
logger.info(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__, info=str(exception)) + "\n"
)
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
if duplicates == count:
raise FileAlreadyExistsError
elif how_many_downloaded + duplicates < count:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
return out

View File

@@ -6,14 +6,14 @@ import re
import urllib.request
from bs4 import BeautifulSoup
from praw.models import Submission
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
class Gfycat(GifDeliveryNetwork):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
super().download()

View File

@@ -4,29 +4,23 @@ import pathlib
import urllib.request
from bs4 import BeautifulSoup
from praw.models import Submission
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
class GifDeliveryNetwork(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
try:
self.post['MEDIAURL'] = self._get_link(self.post['CONTENTURL'])
media_url = self._get_link(self.post.url)
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
self.post['EXTENSION'] = self._get_extension(self.post['MEDIAURL'])
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
self._download_resource(filename, self.directory, self.post['MEDIAURL'])
return [self._download_resource(media_url)]
@staticmethod
def _get_link(url: str) -> str:

View File

@@ -1,16 +1,15 @@
#!/usr/bin/env python3
import json
import pathlib
import logging
import pathlib
import requests
from praw.models import Submission
from bulkredditdownloader.errors import ExtensionError, ImageNotFound, NotADownloadableLinkError
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.site_downloaders.direct import Direct
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
from bulkredditdownloader.utils import GLOBAL, nameCorrector
logger = logging.getLogger(__name__)
@@ -19,85 +18,43 @@ class Imgur(BaseDownloader):
imgur_image_domain = "https://i.imgur.com/"
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.raw_data = {}
self.download()
def download(self):
link = self.post['CONTENTURL']
link = self.post.url
if link.endswith(".gifv"):
link = link.replace(".gifv", ".mp4")
Direct(self.directory, {**self.post, 'CONTENTURL': link})
return
direct_thing = Direct(self.directory, self.post)
return direct_thing.download()
self.raw_data = self._get_data(link)
if self._is_album:
if self.raw_data["album_images"]["count"] != 1:
self._download_album(self.raw_data["album_images"])
out = self._download_album(self.raw_data["album_images"])
else:
self._download_image(self.raw_data["album_images"]["images"][0])
out = self._download_image(self.raw_data["album_images"]["images"][0])
else:
self._download_image(self.raw_data)
out = self._download_image(self.raw_data)
return out
def _download_album(self, images: dict):
folder_name = GLOBAL.config['filename'].format(**self.post)
folder_dir = self.directory / folder_name
images_length = images["count"]
how_many_downloaded = 0
duplicates = 0
folder_dir.mkdir(exist_ok=True)
logger.info(folder_name)
out = []
for i in range(images_length):
extension = self._validate_extension(images["images"][i]["ext"])
image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension
filename = pathlib.Path("_".join([str(i + 1),
nameCorrector(images["images"][i]['title']),
images["images"][i]['hash']]) + extension)
logger.info("\n ({}/{})".format(i + 1, images_length))
try:
self._download_resource(filename, folder_dir, image_url, indent=2)
how_many_downloaded += 1
except FileAlreadyExistsError:
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
logger.info(" Skipping...")
how_many_downloaded += 1
except Exception as exception:
logger.info("\n Could not get the file")
logger.info(
" "
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
if duplicates == images_length:
raise FileAlreadyExistsError
elif how_many_downloaded + duplicates < images_length:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
out.append(self._download_resource(image_url))
return out
def _download_image(self, image: dict):
extension = self._validate_extension(image["ext"])
image_url = self.imgur_image_domain + image["hash"] + extension
filename = GLOBAL.config['filename'].format(**self.post) + extension
self._download_resource(filename, self.directory, image_url)
return [self._download_resource(image_url)]
def _is_album(self) -> bool:
return "album_images" in self.raw_data
@@ -134,9 +91,8 @@ class Imgur(BaseDownloader):
@staticmethod
def _validate_extension(extension_suffix: str) -> str:
possible_extensions = [".jpg", ".png", ".mp4", ".gif"]
for extension in possible_extensions:
if extension in extension_suffix:
return extension
else:
raise ExtensionError(f"\"{extension_suffix}\" is not recognized as a valid extension.")
raise ExtensionError(f'"{extension_suffix}" is not recognized as a valid extension for Imgur')

View File

@@ -5,24 +5,22 @@ import pathlib
import urllib.request
from bs4 import BeautifulSoup
from praw.models import Submission
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.errors import NotADownloadableLinkError
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
class Redgifs(GifDeliveryNetwork):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
super().download()
@staticmethod
def _get_link(url: str) -> str:
"""Extract direct link to the video from page's source
and return it
"""
"""Extract direct link to the video from page's source and return it"""
if '.webm' in url or '.mp4' in url or '.gif' in url:
return url

View File

@@ -1,64 +1,39 @@
#!/usr/bin/env python3
import io
import logging
import pathlib
from pathlib import Path
from praw.models import Submission
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
from bulkredditdownloader.utils import GLOBAL
logger = logging.getLogger(__name__)
class SelfPost(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
if "self" in GLOBAL.arguments.skip:
raise TypeInSkip
return Resource(self.post, self.post.url, bytes(self.export_to_string()))
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post)
file_dir = self.directory / (filename + ".md")
logger.info(file_dir)
logger.info(filename + ".md")
if Path.is_file(file_dir):
raise FileAlreadyExistsError
try:
self._write_to_file(file_dir, self.post)
except FileNotFoundError:
file_dir = self.post['POSTID'] + ".md"
file_dir = self.directory / file_dir
self._write_to_file(file_dir, self.post)
@staticmethod
def _write_to_file(directory: pathlib.Path, post: dict):
def export_to_string(self) -> str:
"""Self posts are formatted here"""
content = ("## ["
+ post["TITLE"]
+ self.post.fullname
+ "]("
+ post["CONTENTURL"]
+ self.post.url
+ ")\n"
+ post["CONTENT"]
+ self.post.selftext
+ "\n\n---\n\n"
+ "submitted to [r/"
+ post["SUBREDDIT"]
+ self.post.subreddit.title
+ "](https://www.reddit.com/r/"
+ post["SUBREDDIT"]
+ self.post.subreddit.title
+ ") by [u/"
+ post["REDDITOR"]
+ self.post.author.name
+ "](https://www.reddit.com/user/"
+ post["REDDITOR"]
+ self.post.author.name
+ ")")
with io.open(directory, "w", encoding="utf-8") as FILE:
print(content, file=FILE)
logger.info("Downloaded")
return content

View File

@@ -4,61 +4,49 @@ import logging
import os
import pathlib
import subprocess
import tempfile
import requests
from praw.models import Submission
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.utils import GLOBAL
logger = logging.getLogger(__name__)
class VReddit(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
extension = ".mp4"
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post) + extension
try:
fnull = open(os.devnull, 'w')
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
except Exception:
self._download_resource(filename, self.directory, self.post['CONTENTURL'])
logger.info("FFMPEG library not found, skipping merging video and audio")
except subprocess.SubprocessError:
return self._download_resource(self.post.url)
else:
video_name = self.post['POSTID'] + "_video"
video_url = self.post['CONTENTURL']
audio_name = self.post['POSTID'] + "_audio"
video_url = self.post.url
audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4'
logger.info(self.directory, filename, sep="\n")
self._download_resource(video_name, self.directory, video_url, silent=True)
self._download_resource(audio_name, self.directory, audio_url, silent=True)
try:
self._merge_audio(video_name, audio_name, filename, self.directory)
except KeyboardInterrupt:
(self.directory / filename).unlink()
(self.directory / audio_name).unlink()
(self.directory / video_name).unlink()
(self.directory / filename).unlink()
with tempfile.TemporaryDirectory() as temp_dir:
video = requests.get(video_url).content
audio = requests.get(audio_url).content
with open(temp_dir / 'video', 'wb')as file:
file.write(video)
with open(temp_dir / 'audio', 'wb') as file:
file.write(audio)
self._merge_audio(temp_dir)
with open(temp_dir / 'output.mp4', 'rb') as file:
content = file.read()
return Resource(self.post, self.post.url, content)
@staticmethod
def _merge_audio(
video: pathlib.Path,
audio: pathlib.Path,
filename: pathlib.Path,
directory: pathlib.Path):
input_video = str(directory / video)
input_audio = str(directory / audio)
def _merge_audio(working_directory: pathlib.Path):
input_video = working_directory / 'video'
input_audio = working_directory / 'audio'
fnull = open(os.devnull, 'w')
cmd = "ffmpeg -i {} -i {} -c:v copy -c:a aac -strict experimental {}".format(
input_audio, input_video, str(directory / filename))
input_audio, input_video, str(working_directory / 'output.mp4'))
subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT)
(directory / video).unlink()
(directory / audio).unlink()

View File

@@ -1,64 +1,37 @@
#!/usr/bin/env python3
import logging
import os
import pathlib
import sys
import tempfile
import youtube_dl
from praw.models import Submission
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import FileAlreadyExistsError
from bulkredditdownloader.utils import GLOBAL
logger = logging.getLogger(__name__)
class Youtube(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
def __init__(self, directory: pathlib.Path, post: Submission):
super().__init__(directory, post)
self.download()
def download(self):
self.directory.mkdir(exist_ok=True)
return self._download_video()
filename = GLOBAL.config['filename'].format(**self.post)
logger.info(filename)
def _download_video(self) -> Resource:
with tempfile.TemporaryDirectory() as temp_dir:
ydl_opts = {
"format": "best",
"outtmpl": str(temp_dir / "test.%(ext)s"),
"playlistend": 1,
"nooverwrites": True,
"quiet": True
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([self.post.url])
self._download_video(filename, self.directory, self.post['CONTENTURL'])
def _download_video(self, filename: str, directory: pathlib.Path, url: str):
ydl_opts = {
"format": "best",
"outtmpl": str(directory / (filename + ".%(ext)s")),
"progress_hooks": [self._hook],
"playlistend": 1,
"nooverwrites": True,
"quiet": True
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
location = directory / (filename + ".mp4")
with open(location, 'rb') as file:
content = file.read()
if GLOBAL.arguments.no_dupes:
try:
file_hash = self._create_hash(content)
except FileNotFoundError:
return None
if file_hash in GLOBAL.downloadedPosts():
os.remove(location)
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(file_hash)
@staticmethod
def _hook(d):
if d['status'] == 'finished':
return logger.info("Downloaded")
downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6)))
file_size = int(d['total_bytes'] * (10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))
sys.stdout.flush()
with open(temp_dir / 'test.mp4', 'rb') as file:
content = file.read()
return Resource(self.post, self.post.url, content)

View File

@@ -1,3 +1,5 @@
#!/usr/bin/env python3
from os import path

View File

@@ -1,42 +1,30 @@
#!/uasr/bin/env python3
#!/usr/bin/env python3
# coding=utf-8
from pathlib import Path
from unittest.mock import Mock
import pytest
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
@pytest.mark.parametrize(('test_bytes', 'expected'), ((b'test', '098f6bcd4621d373cade4e832627b4f6'),
(b'test2', 'ad0234829205b9033196ba818f7a872b')))
def test_create_hash(test_bytes: bytes, expected: str):
result = BaseDownloader._create_hash(test_bytes)
assert result == expected
class BlankDownloader(BaseDownloader):
def __init__(self, directory, post):
super().__init__(directory, post)
def download(self) -> list[Resource]:
return [self._download_resource(self.post.url)]
@pytest.mark.parametrize(('test_url', 'expected'), (('test.png', '.png'),
('random.jpg', '.jpg'),
('http://random.com/test.png', '.png'),
('https://example.net/picture.jpg', '.jpg'),
('https://v.redd.it/picture', '.mp4'),
('https://v.redd.it/picture.jpg', '.jpg'),
('https:/random.url', '.jpg')
))
def test_get_extension(test_url: str, expected: str):
result = BaseDownloader._get_extension(test_url)
assert result == expected
@pytest.mark.skip
@pytest.mark.parametrize(('test_url', 'expected_hash'), (('https://www.iana.org/_img/2013.1/iana-logo-header.svg', ''),
('', '')
))
def test_download_resource(test_url: str, expected_hash: str, tmp_path: Path):
test_file = tmp_path / 'test'
BaseDownloader._download_resource(test_file, tmp_path, test_url)
assert test_file.exists()
with open(test_file, 'rb') as file:
content = file.read()
hash_result = BaseDownloader._create_hash(content)
assert hash_result == expected_hash
@pytest.mark.parametrize(('test_url', 'expected_hash'), (
('https://docs.python.org/3/_static/py.png', 'a721fc7ec672275e257bbbfde49a4d4e'),
))
def test_get_resource(test_url: str, expected_hash: str):
mock_submission = Mock
mock_submission.url = test_url
downloader = BlankDownloader(Path('.'), mock_submission)
result = downloader.download()
assert isinstance(result[0], Resource)
assert result[0].hash.hexdigest() == expected_hash

View File

@@ -1,90 +0,0 @@
import io
import sys
from os import makedirs, path
from pathlib import Path
from typing import Optional
from bulkredditdownloader.json_helper import JsonFile
class GLOBAL:
"""Declare global variables"""
RUN_TIME = ""
config = {'imgur_client_id': None, 'imgur_client_secret': None}
arguments = None
directory = None
defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit"
configDirectory = ""
reddit_client_id = "U-6gk4ZCh3IeNQ"
reddit_client_secret = "7CZHY6AmKweZME5s50SfDGylaPg"
printVanilla = print
log_stream = None
@staticmethod
def downloadedPosts() -> list:
return []
def createLogFile(title: str) -> JsonFile:
"""Create a log file with given name
inside a folder time stampt in its name and
put given arguments inside \"HEADER\" key
"""
folder_directory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME
log_filename = title.upper() + '.json'
if not path.exists(folder_directory):
makedirs(folder_directory)
file = JsonFile(folder_directory / Path(log_filename))
header = " ".join(sys.argv)
file.add({"HEADER": header})
return file
def printToFile(*args, no_print=False, **kwargs):
"""Print to both CONSOLE and
CONSOLE LOG file in a folder time stampt in the name
"""
folder_directory = GLOBAL.directory / Path("LOG_FILES") / Path(GLOBAL.RUN_TIME)
if not no_print or GLOBAL.arguments.verbose or "file" in kwargs:
print(*args, **kwargs)
if not path.exists(folder_directory):
makedirs(folder_directory)
if "file" not in kwargs:
with io.open(folder_directory / "CONSOLE_LOG.txt", "a", encoding="utf-8") as FILE:
print(*args, file=FILE, **kwargs)
def nameCorrector(string: str, reference: Optional[str] = None) -> str:
"""Swap strange characters from given string
with underscore (_) and shorten it.
Return the string
"""
limit = 247
string_length = len(string)
if reference:
reference_length = len(reference)
total_lenght = reference_length
else:
total_lenght = string_length
if total_lenght > limit:
limit -= reference_length
string = string[:limit - 1]
string = string.replace(" ", "_")
if len(string.split('\n')) > 1:
string = "".join(string.split('\n'))
bad_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '#', '.', '@', '', '', '\'', '!']
string = "".join([i if i not in bad_chars else "_" for i in string])
return string

View File

@@ -1,13 +1,15 @@
#!C:\Users\Ali\AppData\Local\Programs\Python\Python36\python.exe
## python setup.py build
# python setup.py build
import sys
from cx_Freeze import setup, Executable
from cx_Freeze import Executable, setup
from bulkredditdownloader.__main__ import __version__
options = {
"build_exe": {
"packages":[
"packages": [
"idna", "praw", "requests", "multiprocessing"
]
}
@@ -15,7 +17,7 @@ options = {
if sys.platform == "win32":
executables = [Executable(
"script.py",
"script.py",
targetName="bulk-downloader-for-reddit.exe",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
@@ -23,28 +25,26 @@ if sys.platform == "win32":
elif sys.platform == "linux":
executables = [Executable(
"script.py",
"script.py",
targetName="bulk-downloader-for-reddit",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
)]
setup(
name = "Bulk Downloader for Reddit",
version = __version__,
description = "Bulk Downloader for Reddit",
author = "Ali Parlakci",
name="Bulk Downloader for Reddit",
version=__version__,
description="Bulk Downloader for Reddit",
author="Ali Parlakci",
author_email="parlakciali@gmail.com",
url="https://github.com/aliparlakci/bulk-downloader-for-reddit",
classifiers=(
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
"Natural Language :: English",
"Environment :: Console",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
"Natural Language :: English",
"Environment :: Console",
"Operating System :: OS Independent",
),
executables = executables,
options = options
executables=executables,
options=options
)