Move to different program structure

This commit is contained in:
Serene-Arc
2021-02-11 09:10:40 +10:00
committed by Ali Parlakci
parent a72abd6603
commit a7f1db14e5
24 changed files with 504 additions and 2133 deletions

View File

@@ -4,338 +4,158 @@
This program downloads imgur, gfycat and direct image and video links of
saved posts from a reddit account. It is written in Python 3.
"""
import argparse
import logging
import os
import sys
import time
from io import StringIO
from pathlib import Path
from prawcore.exceptions import InsufficientScope
from bulkredditdownloader.arguments import Arguments
from bulkredditdownloader.config import Config
from bulkredditdownloader.site_downloaders.direct import Direct
from bulkredditdownloader.site_downloaders.erome import Erome
from bulkredditdownloader.site_downloaders.gallery import Gallery
from bulkredditdownloader.site_downloaders.gfycat import Gfycat
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.site_downloaders.imgur import Imgur
from bulkredditdownloader.site_downloaders.redgifs import Redgifs
from bulkredditdownloader.site_downloaders.self_post import SelfPost
from bulkredditdownloader.site_downloaders.vreddit import VReddit
from bulkredditdownloader.site_downloaders.youtube import Youtube
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, DomainInSkip, FailedToDownload, FileAlreadyExistsError,
ImgurLimitError, ImgurLoginError, InvalidJSONFile, NoSuitablePost, NotADownloadableLinkError,
TypeInSkip, full_exc_info)
from bulkredditdownloader.json_helper import JsonFile
from bulkredditdownloader.program_mode import ProgramMode
from bulkredditdownloader.reddit import Reddit
from bulkredditdownloader.searcher import getPosts
from bulkredditdownloader.store import Store
from bulkredditdownloader.utils import GLOBAL, createLogFile, nameCorrector, printToFile
from bulkredditdownloader.downloader import RedditDownloader
from bulkredditdownloader.errors import BulkDownloaderException
from time import sleep
__author__ = "Ali Parlakci"
__license__ = "GPL"
__version__ = "1.10.0"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
logger = logging.getLogger()
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads media from reddit posts")
def postFromLog(filename):
"""Analyze a log file and return a list of dictionaries containing
submissions
"""
if Path.is_file(Path(filename)):
content = JsonFile(filename).read()
def _add_options():
parser.add_argument("directory",
help="Specifies the directory where posts will be downloaded to",
metavar="DIRECTORY")
parser.add_argument("--verbose", "-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit", "-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link", "-l",
help="Get posts from link",
metavar="link")
parser.add_argument("--saved",
action="store_true",
required="--unsave" in sys.argv,
help="Triggers saved mode")
parser.add_argument("--unsave",
action="store_true",
help="Unsaves downloaded posts")
parser.add_argument("--submitted",
action="store_true",
help="Gets posts of --user")
parser.add_argument("--upvoted",
action="store_true",
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Takes a log file which created by itself (json files),reads posts and tries "
"downloading them again.",
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's name without r/. use \"frontpage\" "
"for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes multireddit's name without m",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for current user",
required="--multireddit" in sys.argv or "--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument("--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising or relevance default: hot",
choices=["hot", "top", "new", "controversial", "rising", "relevance"],
metavar="SORT TYPE",
type=str)
parser.add_argument("--limit",
help="default: unlimited",
metavar="Limit",
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all. default: all",
choices=["all", "hour", "day", "week", "month", "year"],
metavar="TIME_LIMIT",
type=str)
parser.add_argument("--skip",
nargs="+",
help="Skip posts with given type",
type=str,
choices=["images", "videos", "gifs", "self"],
default=[])
parser.add_argument("--skip-domain",
nargs="+",
help="Skip posts with given domain",
type=str,
default=[])
parser.add_argument("--set-folderpath",
action="store_true",
help="Set custom folderpath",
default='{SUBREDDIT}'
)
parser.add_argument("--set-filename",
action="store_true",
help="Set custom filename",
default='{REDDITOR}_{TITLE}_{POSTID}'
)
parser.add_argument("--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument("--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument("--use-local-config",
action="store_true",
help="Creates a config file in the program's directory"
" and uses it. Useful for having multiple configs",
)
parser.add_argument("--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument("--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str
)
parser.add_argument("--no-download",
action="store_true",
help="Just saved posts into a the POSTS.json file without downloading"
)
def _setup_logging(verbosity: int):
logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
stream.setFormatter(formatter)
logger.addHandler(stream)
if verbosity < 0:
stream.setLevel(logging.INFO)
else:
print("File not found")
sys.exit()
stream.setLevel(logging.DEBUG)
logging.getLogger('praw').setLevel(logging.CRITICAL)
logging.getLogger('prawcore').setLevel(logging.CRITICAL)
logging.getLogger('urllib3').setLevel(logging.CRITICAL)
def main(args: argparse.Namespace):
_setup_logging(args.verbose)
try:
del content["HEADER"]
except KeyError:
pass
reddit_downloader = RedditDownloader(args)
reddit_downloader.download()
except BulkDownloaderException as e:
logger.critical(f'An error occured {e}')
posts = []
for post in content:
if not content[post][-1]['TYPE'] is None:
posts.append(content[post][-1])
return posts
def isPostExists(post, directory):
"""Figure out a file's name and checks if the file already exists"""
filename = GLOBAL.config['filename'].format(**post)
possible_extensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md", ".mkv", ".flv"]
for extension in possible_extensions:
path = directory / Path(filename + extension)
if path.exists():
return True
return False
def downloadPost(submission, directory):
downloaders = {
"imgur": Imgur, "gfycat": Gfycat, "erome": Erome, "direct": Direct, "self": SelfPost,
"redgifs": Redgifs, "gifdeliverynetwork": GifDeliveryNetwork,
"v.redd.it": VReddit, "youtube": Youtube, "gallery": Gallery
}
print()
if submission['TYPE'] in downloaders:
downloaders[submission['TYPE']](directory, submission)
else:
raise NoSuitablePost
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
downloaded_count = 0
duplicates = 0
failed_file = createLogFile("FAILED")
if GLOBAL.arguments.unsave:
reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin()
subs_length = len(submissions)
for i in range(len(submissions)):
print(f"\n({i+1}/{subs_length})", end="")
print(submissions[i]['POSTID'],
f"r/{submissions[i]['SUBREDDIT']}",
f"u/{submissions[i]['REDDITOR']}",
submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "",
sep="",
end="")
print(f" {submissions[i]['TYPE'].upper()}", end="", no_print=True)
directory = GLOBAL.directory / \
GLOBAL.config["folderpath"].format(**submissions[i])
details = {
**submissions[i],
**{"TITLE": nameCorrector(
submissions[i]['TITLE'],
reference=str(directory)
+ GLOBAL.config['filename'].format(**submissions[i])
+ ".ext")}
}
filename = GLOBAL.config['filename'].format(**details)
if isPostExists(details, directory):
print()
print(directory)
print(filename)
print("It already exists")
duplicates += 1
continue
if any(domain in submissions[i]['CONTENTURL'] for domain in GLOBAL.arguments.skip):
print()
print(submissions[i]['CONTENTURL'])
print("Domain found in skip domains, skipping post...")
continue
try:
downloadPost(details, directory)
GLOBAL.downloadedPosts.add(details['POSTID'])
try:
if GLOBAL.arguments.unsave:
reddit.submission(id=details['POSTID']).unsave()
except InsufficientScope:
reddit = Reddit().begin()
reddit.submission(id=details['POSTID']).unsave()
downloaded_count += 1
except FileAlreadyExistsError:
print("It already exists")
GLOBAL.downloadedPosts.add(details['POSTID'])
duplicates += 1
except ImgurLoginError:
print("Imgur login failed. \nQuitting the program as unexpected errors might occur.")
sys.exit()
except ImgurLimitError as exception:
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)), details
]})
except NotADownloadableLinkError as exception:
print("{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)))
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)),
submissions[i]
]})
except TypeInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except DomainInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except NoSuitablePost:
print("No match found, skipping...")
except FailedToDownload:
print("Failed to download the posts, skipping...")
except AlbumNotDownloadedCompletely:
print("Album did not downloaded completely.")
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
except Exception as exc:
print("{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exc.__class__.__name__, info=str(exc))
)
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
if duplicates:
print(f"\nThere {'were' if duplicates > 1 else 'was'} {duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloaded_count == 0:
print("Nothing is downloaded :(")
else:
print(f"Total of {downloaded_count} link{'s' if downloaded_count > 1 else ''} downloaded!")
def printLogo():
VanillaPrint(f"\nBulk Downloader for Reddit v{__version__}\n"
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n"
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n"
)
def main():
if Path("config.json").exists():
GLOBAL.configDirectory = Path("config.json")
else:
if not Path(GLOBAL.defaultConfigDirectory).is_dir():
os.makedirs(GLOBAL.defaultConfigDirectory)
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
try:
GLOBAL.config = Config(GLOBAL.configDirectory).generate()
except InvalidJSONFile as exception:
VanillaPrint(str(exception.__class__.__name__), ">>", str(exception))
VanillaPrint("Resolve it or remove it to proceed")
sys.exit()
sys.argv = sys.argv + GLOBAL.config["options"].split()
arguments = Arguments.parse()
GLOBAL.arguments = arguments
if arguments.set_filename:
Config(GLOBAL.configDirectory).setCustomFileName()
sys.exit()
if arguments.set_folderpath:
Config(GLOBAL.configDirectory).setCustomFolderPath()
sys.exit()
if arguments.set_default_directory:
Config(GLOBAL.configDirectory).setDefaultDirectory()
sys.exit()
if arguments.set_default_options:
Config(GLOBAL.configDirectory).setDefaultOptions()
sys.exit()
if arguments.use_local_config:
JsonFile("config.json").add(GLOBAL.config)
sys.exit()
if arguments.directory:
GLOBAL.directory = Path(arguments.directory.strip())
elif "default_directory" in GLOBAL.config and GLOBAL.config["default_directory"] != "":
GLOBAL.directory = Path(
GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME))
else:
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
if arguments.downloaded_posts:
GLOBAL.downloadedPosts = Store(arguments.downloaded_posts)
else:
GLOBAL.downloadedPosts = Store()
printLogo()
print("\n", " ".join(sys.argv), "\n", no_print=True)
if arguments.log is not None:
log_dir = Path(arguments.log)
download(postFromLog(log_dir))
sys.exit()
program_mode = ProgramMode(arguments).generate()
try:
posts = getPosts(program_mode)
except Exception as exc:
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
print(exc)
sys.exit()
if posts is None:
print("I could not find any posts in that URL")
sys.exit()
if GLOBAL.arguments.no_download:
pass
else:
download(posts)
if __name__ == "__main__":
GLOBAL.log_stream = StringIO()
logging.basicConfig(stream=GLOBAL.log_stream, level=logging.INFO)
try:
VanillaPrint = print
print = printToFile
GLOBAL.RUN_TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime(time.time())))
main()
except KeyboardInterrupt:
if GLOBAL.directory is None:
GLOBAL.directory = Path("../..\\")
except Exception as exception:
if GLOBAL.directory is None:
GLOBAL.directory = Path("../..\\")
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue())
if not GLOBAL.arguments.quit:
input("\nPress enter to quit\n")
if __name__ == '__main__':
_add_options()
args = parser.parse_args()
main(args)