From 4143b864678dbd3f44179c41970b532ba0f10500 Mon Sep 17 00:00:00 2001 From: Serene <33189705+Serene-Arc@users.noreply.github.com> Date: Sat, 6 Feb 2021 18:35:50 +1000 Subject: [PATCH] Pep8 format (#184) * Format file to be PEP8 compliant * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Condense spacing --- script.py | 183 +++++++--------- src/arguments.py | 113 ++++------ src/config.py | 43 +--- src/downloaders/Direct.py | 12 +- src/downloaders/Erome.py | 96 ++++---- src/downloaders/Gallery.py | 115 ---------- src/downloaders/Gfycat.py | 28 +-- src/downloaders/Imgur.py | 118 +++++----- src/downloaders/downloaderUtils.py | 78 +++---- src/downloaders/gallery.py | 110 ++++++++++ src/downloaders/gifDeliveryNetwork.py | 27 +-- src/downloaders/redgifs.py | 25 +-- src/downloaders/selfPost.py | 17 +- src/downloaders/vreddit.py | 47 ++-- src/downloaders/youtube.py | 19 +- src/errors.py | 3 - src/jsonHelper.py | 30 ++- src/parser.py | 251 +++++++++++---------- src/programMode.py | 189 +++++++--------- src/reddit.py | 36 +-- src/searcher.py | 303 ++++++++++++-------------- src/utils.py | 67 +++--- 22 files changed, 836 insertions(+), 1074 deletions(-) delete mode 100644 src/downloaders/Gallery.py create mode 100644 src/downloaders/gallery.py diff --git a/script.py b/script.py index 6ace7f8..87800b9 100644 --- a/script.py +++ b/script.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ -This program downloads imgur, gfycat and direct image and video links of +This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3. """ import logging @@ -12,26 +12,28 @@ from io import StringIO from pathlib import Path from prawcore.exceptions import InsufficientScope +from src.arguments import Arguments +from src.config import Config from src.downloaders.Direct import Direct from src.downloaders.Erome import Erome +from src.downloaders.gallery import Gallery from src.downloaders.Gfycat import Gfycat +from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork from src.downloaders.Imgur import Imgur from src.downloaders.Gallery import Gallery from src.downloaders.redgifs import Redgifs from src.downloaders.selfPost import SelfPost from src.downloaders.vreddit import VReddit from src.downloaders.youtube import Youtube -from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork -from src.errors import ImgurLimitError, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info -from src.searcher import getPosts -from src.utils import (GLOBAL, createLogFile, nameCorrector, - printToFile) +from src.errors import (AlbumNotDownloadedCompletely, DomainInSkip, FailedToDownload, FileAlreadyExistsError, + ImgurLimitError, ImgurLoginError, InvalidJSONFile, NoSuitablePost, NotADownloadableLinkError, + TypeInSkip, full_exc_info) from src.jsonHelper import JsonFile -from src.config import Config -from src.arguments import Arguments from src.programMode import ProgramMode from src.reddit import Reddit +from src.searcher import getPosts from src.store import Store +from src.utils import GLOBAL, createLogFile, nameCorrector, printToFile from time import sleep @@ -42,12 +44,12 @@ __maintainer__ = "Ali Parlakci" __email__ = "parlakciali@gmail.com" -def postFromLog(fileName): +def postFromLog(filename): """Analyze a log file and return a list of dictionaries containing submissions """ - if Path.is_file(Path(fileName)): - content = JsonFile(fileName).read() + if Path.is_file(Path(filename)): + content = JsonFile(filename).read() else: print("File not found") sys.exit() @@ -60,23 +62,22 @@ def postFromLog(fileName): posts = [] for post in content: - if content[post][-1]['TYPE'] is not None: + if not content[post][-1]['TYPE'] is None: posts.append(content[post][-1]) return posts -def isPostExists(POST, directory): +def isPostExists(post, directory): """Figure out a file's name and checks if the file already exists""" - filename = GLOBAL.config['filename'].format(**POST) + filename = GLOBAL.config['filename'].format(**post) - possibleExtensions = [".jpg", ".png", ".mp4", - ".gif", ".webm", ".md", ".mkv", ".flv"] + possible_extensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md", ".mkv", ".flv"] - for extension in possibleExtensions: + for extension in possible_extensions: - path = directory / Path(filename+extension) + path = directory / Path(filename + extension) if path.exists(): return True @@ -84,58 +85,57 @@ def isPostExists(POST, directory): return False -def downloadPost(SUBMISSION, directory): + +def downloadPost(submission, directory): downloaders = { - "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost, - "redgifs":Redgifs, "gifdeliverynetwork": GifDeliveryNetwork, + "imgur": Imgur, "gfycat": Gfycat, "erome": Erome, "direct": Direct, "self": SelfPost, + "redgifs": Redgifs, "gifdeliverynetwork": GifDeliveryNetwork, "v.redd.it": VReddit, "youtube": Youtube, "gallery": Gallery } print() - if SUBMISSION['TYPE'] in downloaders: - downloaders[SUBMISSION['TYPE']](directory, SUBMISSION) + if submission['TYPE'] in downloaders: + downloaders[submission['TYPE']](directory, submission) else: raise NoSuitablePost + def download(submissions): """Analyze list of submissions and call the right function to download each one, catch errors, update the log files """ - downloadedCount = 0 + downloaded_count = 0 duplicates = 0 - FAILED_FILE = createLogFile("FAILED") + failed_file = createLogFile("FAILED") if GLOBAL.arguments.unsave: reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin() - subsLenght = len(submissions) + subs_length = len(submissions) for i in range(len(submissions)): - print(f"\n({i+1}/{subsLenght})", end=" — ") + print(f"\n({i+1}/{subs_length})", end=" — ") print(submissions[i]['POSTID'], f"r/{submissions[i]['SUBREDDIT']}", f"u/{submissions[i]['REDDITOR']}", submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "", sep=" — ", end="") - print(f" – {submissions[i]['TYPE'].upper()}", end="", noPrint=True) + print(f" – {submissions[i]['TYPE'].upper()}", end="", no_print=True) directory = GLOBAL.directory / \ GLOBAL.config["folderpath"].format(**submissions[i]) details = { **submissions[i], - **{ - "TITLE": nameCorrector( - submissions[i]['TITLE'], - reference=str(directory) - + GLOBAL.config['filename'].format(**submissions[i]) - + ".ext" - ) - } + **{"TITLE": nameCorrector( + submissions[i]['TITLE'], + reference=str(directory) + + GLOBAL.config['filename'].format(**submissions[i]) + + ".ext")} } filename = GLOBAL.config['filename'].format(**details) @@ -164,11 +164,7 @@ def download(submissions): reddit = Reddit().begin() reddit.submission(id=details['POSTID']).unsave() - if GLOBAL.arguments.download_delay: - print(f"Delaying next download for {GLOBAL.arguments.download_delay} seconds...") - sleep(GLOBAL.arguments.download_delay) - - downloadedCount += 1 + downloaded_count += 1 except FileAlreadyExistsError: print("It already exists") @@ -176,33 +172,18 @@ def download(submissions): duplicates += 1 except ImgurLoginError: - print( - "Imgur login failed. \nQuitting the program " - "as unexpected errors might occur." - ) + print("Imgur login failed. \nQuitting the program as unexpected errors might occur.") sys.exit() except ImgurLimitError as exception: - FAILED_FILE.add({int(i+1): [ - "{class_name}: {info}".format( - class_name=exception.__class__.__name__, info=str( - exception) - ), - details + failed_file.add({int(i + 1): [ + "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)), details ]}) except NotADownloadableLinkError as exception: - print( - "{class_name}: {info}".format( - class_name=exception.__class__.__name__, info=str( - exception) - ) - ) - FAILED_FILE.add({int(i+1): [ - "{class_name}: {info}".format( - class_name=exception.__class__.__name__, info=str( - exception) - ), + print("{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))) + failed_file.add({int(i + 1): [ + "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)), submissions[i] ]}) @@ -223,60 +204,48 @@ def download(submissions): print("Failed to download the posts, skipping...") except AlbumNotDownloadedCompletely: print("Album did not downloaded completely.") - FAILED_FILE.add({int(i+1): [ - "{class_name}: {info}".format( - class_name=exc.__class__.__name__, info=str(exc) - ), + failed_file.add({int(i + 1): [ + "{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)), submissions[i] ]}) except Exception as exc: - print( - "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( - class_name=exc.__class__.__name__, info=str(exc) - ) + print("{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + class_name=exc.__class__.__name__, info=str(exc)) ) - logging.error(sys.exc_info()[0].__name__, - exc_info=full_exc_info(sys.exc_info())) - print(GLOBAL.log_stream.getvalue(), noPrint=True) + logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) + print(GLOBAL.log_stream.getvalue(), no_print=True) - FAILED_FILE.add({int(i+1): [ - "{class_name}: {info}".format( - class_name=exc.__class__.__name__, info=str(exc) - ), + failed_file.add({int(i + 1): [ + "{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)), submissions[i] ]}) if duplicates: - print(f"\nThere {'were' if duplicates > 1 else 'was'} " - f"{duplicates} duplicate{'s' if duplicates > 1 else ''}") + print(f"\nThere {'were' if duplicates > 1 else 'was'} {duplicates} duplicate{'s' if duplicates > 1 else ''}") - if downloadedCount: - print(f"Total of {downloadedCount} " - f"link{'s' if downloadedCount > 1 else ''} downloaded!") + if downloaded_count == 0: + print("Nothing is downloaded :(") else: - print("Nothing is downloaded :(") + print(f"Total of {downloaded_count} link{'s' if downloaded_count > 1 else ''} downloaded!") def printLogo(): + VanillaPrint(f"\nBulk Downloader for Reddit v{__version__}\n" + f"Written by Ali PARLAKCI – parlakciali@gmail.com\n\n" + f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n" + ) - VanillaPrint( - f"\nBulk Downloader for Reddit v{__version__}\n" - f"Written by Ali PARLAKCI – parlakciali@gmail.com\n\n" - f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n" - ) def main(): - - if not Path(GLOBAL.defaultConfigDirectory).is_dir(): - os.makedirs(GLOBAL.defaultConfigDirectory) - if Path("config.json").exists(): GLOBAL.configDirectory = Path("config.json") else: - GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json" + if not Path(GLOBAL.defaultConfigDirectory).is_dir(): + os.makedirs(GLOBAL.defaultConfigDirectory) + GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json" try: GLOBAL.config = Config(GLOBAL.configDirectory).generate() except InvalidJSONFile as exception: @@ -307,6 +276,7 @@ def main(): if arguments.use_local_config: JsonFile("config.json").add(GLOBAL.config) + sys.exit() if arguments.directory: GLOBAL.directory = Path(arguments.directory.strip()) @@ -322,21 +292,20 @@ def main(): GLOBAL.downloadedPosts = Store() printLogo() - print("\n", " ".join(sys.argv), "\n", noPrint=True) + print("\n", " ".join(sys.argv), "\n", no_print=True) if arguments.log is not None: - logDir = Path(arguments.log) - download(postFromLog(logDir)) + log_dir = Path(arguments.log) + download(postFromLog(log_dir)) sys.exit() - programMode = ProgramMode(arguments).generate() + program_mode = ProgramMode(arguments).generate() try: - posts = getPosts(programMode) + posts = getPosts(program_mode) except Exception as exc: - logging.error(sys.exc_info()[0].__name__, - exc_info=full_exc_info(sys.exc_info())) - print(GLOBAL.log_stream.getvalue(), noPrint=True) + logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) + print(GLOBAL.log_stream.getvalue(), no_print=True) print(exc) sys.exit() @@ -358,10 +327,7 @@ if __name__ == "__main__": try: VanillaPrint = print print = printToFile - GLOBAL.RUN_TIME = str(time.strftime( - "%d-%m-%Y_%H-%M-%S", - time.localtime(time.time()) - )) + GLOBAL.RUN_TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime(time.time()))) main() except KeyboardInterrupt: @@ -371,9 +337,8 @@ if __name__ == "__main__": except Exception as exception: if GLOBAL.directory is None: GLOBAL.directory = Path("..\\") - logging.error(sys.exc_info()[0].__name__, - exc_info=full_exc_info(sys.exc_info())) + logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) print(GLOBAL.log_stream.getvalue()) - if not GLOBAL.arguments.quit: input("\nPress enter to quit\n") - + if not GLOBAL.arguments.quit: + input("\nPress enter to quit\n") diff --git a/src/arguments.py b/src/arguments.py index 8bdcf9d..cbf72c7 100644 --- a/src/arguments.py +++ b/src/arguments.py @@ -10,12 +10,9 @@ class Arguments: arguments = [] parser = argparse.ArgumentParser(allow_abbrev=False, - description="This program downloads " - "media from reddit " - "posts") + description="This program downloads media from reddit posts") parser.add_argument("--directory", "-d", - help="Specifies the directory where posts will be " - "downloaded to", + help="Specifies the directory where posts will be downloaded to", metavar="DIRECTORY") parser.add_argument("--verbose", "-v", @@ -50,31 +47,26 @@ class Arguments: help="Gets upvoted posts of --user") parser.add_argument("--log", - help="Takes a log file which created by itself " - "(json files), reads posts and tries downloadin" - "g them again.", + help="Takes a log file which created by itself (json files),reads posts and tries " + "downloading them again.", # type=argparse.FileType('r'), metavar="LOG FILE") - parser.add_argument( - "--subreddit", - nargs="+", - help="Triggers subreddit mode and takes subreddit's " - "name without r/. use \"frontpage\" for frontpage", - metavar="SUBREDDIT", - type=str) + parser.add_argument("--subreddit", + nargs="+", + help="Triggers subreddit mode and takes subreddit's name without r/. use \"frontpage\" " + "for frontpage", + metavar="SUBREDDIT", + type=str) parser.add_argument("--multireddit", - help="Triggers multireddit mode and takes " - "multireddit's name without m/", + help="Triggers multireddit mode and takes multireddit's name without m", metavar="MULTIREDDIT", type=str) parser.add_argument("--user", - help="reddit username if needed. use \"me\" for " - "current user", - required="--multireddit" in sys.argv or - "--submitted" in sys.argv, + help="reddit username if needed. use \"me\" for current user", + required="--multireddit" in sys.argv or "--submitted" in sys.argv, metavar="redditor", type=str) @@ -85,12 +77,8 @@ class Arguments: type=str) parser.add_argument("--sort", - help="Either hot, top, new, controversial, rising " - "or relevance default: hot", - choices=[ - "hot", "top", "new", "controversial", "rising", - "relevance" - ], + help="Either hot, top, new, controversial, rising or relevance default: hot", + choices=["hot", "top", "new", "controversial", "rising", "relevance"], metavar="SORT TYPE", type=str) @@ -100,10 +88,8 @@ class Arguments: type=int) parser.add_argument("--time", - help="Either hour, day, week, month, year or all." - " default: all", - choices=["all", "hour", "day", - "week", "month", "year"], + help="Either hour, day, week, month, year or all. default: all", + choices=["all", "hour", "day", "week", "month", "year"], metavar="TIME_LIMIT", type=str) @@ -130,47 +116,38 @@ class Arguments: help="Set custom filename", ) - parser.add_argument( - "--set-default-directory", - action="store_true", - help="Set a default directory to be used in case no directory is given", - ) + parser.add_argument("--set-default-directory", + action="store_true", + help="Set a default directory to be used in case no directory is given", + ) - parser.add_argument( - "--set-default-options", - action="store_true", - help="Set default options to use everytime program runs", - ) + parser.add_argument("--set-default-options", + action="store_true", + help="Set default options to use everytime program runs", + ) - parser.add_argument( - "--use-local-config", - action="store_true", - help="Creates a config file in the program's directory and uses it. Useful for having multiple configs", - ) + parser.add_argument("--use-local-config", + action="store_true", + help="Creates a config file in the program's directory" + " and uses it. Useful for having multiple configs", + ) - parser.add_argument( - "--no-dupes", - action="store_true", - help="Do not download duplicate posts on different subreddits", - ) + parser.add_argument("--no-dupes", + action="store_true", + help="Do not download duplicate posts on different subreddits", + ) - parser.add_argument( - "--downloaded-posts", - help="Use a hash file to keep track of downloaded files", - type=str) + parser.add_argument("--downloaded-posts", + help="Use a hash file to keep track of downloaded files", + type=str + ) - parser.add_argument( - "--no-download", - action="store_true", - help="Just saved posts into a the POSTS.json file without downloading") + parser.add_argument("--no-download", + action="store_true", + help="Just saved posts into a the POSTS.json file without downloading" + ) - parser.add_argument( - "--download-delay", - metavar="DELAY", - type=int, - help="Amount, in seconds, to delay before beginning the next item in the download queue") - - - if arguments == []: + if not arguments: return parser.parse_args() - return parser.parse_args(arguments) + else: + return parser.parse_args(arguments) diff --git a/src/config.py b/src/config.py index 662f91a..3f9f17a 100644 --- a/src/config.py +++ b/src/config.py @@ -1,10 +1,9 @@ - from src.reddit import Reddit from src.jsonHelper import JsonFile from src.utils import nameCorrector -class Config(): +class Config: def __init__(self, filename): self.filename = filename @@ -35,23 +34,17 @@ For example: {FLAIR}_{SUBREDDIT}_{REDDITOR} Existing filename template:""", None if "filename" not in self.file.read() else self.file.read()["filename"]) filename = nameCorrector(input(">> ").upper()) - self.file.add({ - "filename": filename - }) + self.file.add({"filename": filename}) def _readCustomFileName(self): content = self.file.read() if "filename" not in content: - self.file.add({ - "filename": "{REDDITOR}_{TITLE}_{POSTID}" - }) + self.file.add({"filename": "{REDDITOR}_{TITLE}_{POSTID}"}) content = self.file.read() - if not "{POSTID}" in content["filename"]: - self.file.add({ - "filename": content["filename"] + "_{POSTID}" - }) + if "{POSTID}" not in content["filename"]: + self.file.add({"filename": content["filename"] + "_{POSTID}"}) def setCustomFolderPath(self): print(""" @@ -68,16 +61,12 @@ Existing folder structure""", None if "folderpath" not in self.file.read() else folderpath = nameCorrector(input(">> ").strip("\\").strip("/").upper()) - self.file.add({ - "folderpath": folderpath - }) + self.file.add({"folderpath": folderpath}) def _readCustomFolderPath(self, path=None): content = self.file.read() if "folderpath" not in content: - self.file.add({ - "folderpath": "{SUBREDDIT}" - }) + self.file.add({"folderpath": "{SUBREDDIT}"}) def setDefaultOptions(self): print(""" @@ -89,33 +78,25 @@ Existing default options:""", None if "options" not in self.file.read() else sel options = input(">> ").strip("") - self.file.add({ - "options": options - }) + self.file.add({"options": options}) def _readDefaultOptions(self, path=None): content = self.file.read() if "options" not in content: - self.file.add({ - "options": "" - }) + self.file.add({"options": ""}) def _validateCredentials(self): """Read credentials from config.json file""" - try: content = self.file.read()["credentials"] except BaseException: - self.file.add({ - "credentials": {} - }) + self.file.add({"credentials": {}}) content = self.file.read()["credentials"] if "reddit" in content and len(content["reddit"]) != 0: pass else: Reddit().begin() - print() def setDefaultDirectory(self): @@ -125,6 +106,4 @@ For example: D:/archive/BDFR_{time} """) print("Current default directory:", self.file.read()[ "default_directory"] if "default_directory" in self.file.read() else "") - self.file.add({ - "default_directory": input(">> ") - }) + self.file.add({"default_directory": input(">> ")}) diff --git a/src/downloaders/Direct.py b/src/downloaders/Direct.py index e22c8a3..44bbe61 100644 --- a/src/downloaders/Direct.py +++ b/src/downloaders/Direct.py @@ -1,16 +1,16 @@ import os -from src.downloaders.downloaderUtils import getFile, getExtension +from src.downloaders.downloaderUtils import getExtension, getFile from src.utils import GLOBAL class Direct: - def __init__(self, directory, POST): - POST['EXTENSION'] = getExtension(POST['CONTENTURL']) + def __init__(self, directory, post): + post['EXTENSION'] = getExtension(post['CONTENTURL']) if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] - shortFilename = POST['POSTID'] + POST['EXTENSION'] + filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] + short_filename = post['POSTID'] + post['EXTENSION'] - getFile(filename, shortFilename, directory, POST['CONTENTURL']) + getFile(filename, short_filename, directory, post['CONTENTURL']) diff --git a/src/downloaders/Erome.py b/src/downloaders/Erome.py index 4c7ef5c..9283131 100644 --- a/src/downloaders/Erome.py +++ b/src/downloaders/Erome.py @@ -1,12 +1,10 @@ import os +import urllib.error import urllib.request from html.parser import HTMLParser -from src.downloaders.downloaderUtils import getFile -from src.downloaders.downloaderUtils import getExtension - -from src.errors import (AlbumNotDownloadedCompletely, - NotADownloadableLinkError, FileAlreadyExistsError) +from src.downloaders.downloaderUtils import getExtension, getFile +from src.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError from src.utils import GLOBAL from src.utils import printToFile as print @@ -14,88 +12,77 @@ from src.utils import printToFile as print class Erome: def __init__(self, directory, post): try: - IMAGES = self.getLinks(post['CONTENTURL']) + images = self.getLinks(post['CONTENTURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") - imagesLenght = len(IMAGES) - howManyDownloaded = imagesLenght + images_length = len(images) + how_many_downloaded = images_length duplicates = 0 - if imagesLenght == 1: - - extension = getExtension(IMAGES[0]) + if images_length == 1: + extension = getExtension(images[0]) """Filenames are declared here""" + filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] + short_filename = post['POSTID'] + extension - filename = GLOBAL.config['filename'].format( - **post) + post["EXTENSION"] - shortFilename = post['POSTID'] + extension + image_url = images[0] + if 'https://' not in image_url or 'http://' not in image_url: + image_url = "https://" + image_url - imageURL = IMAGES[0] - if 'https://' not in imageURL or 'http://' not in imageURL: - imageURL = "https://" + imageURL - - getFile(filename, shortFilename, directory, imageURL) + getFile(filename, short_filename, directory, image_url) else: filename = GLOBAL.config['filename'].format(**post) - print(filename) - folderDir = directory / filename + folder_dir = directory / filename try: - if not os.path.exists(folderDir): - os.makedirs(folderDir) + if not os.path.exists(folder_dir): + os.makedirs(folder_dir) except FileNotFoundError: - folderDir = directory / post['POSTID'] - os.makedirs(folderDir) + folder_dir = directory / post['POSTID'] + os.makedirs(folder_dir) - for i in range(imagesLenght): - - extension = getExtension(IMAGES[i]) + for i in range(images_length): + extension = getExtension(images[i]) filename = str(i + 1) + extension - imageURL = IMAGES[i] - if 'https://' not in imageURL and 'http://' not in imageURL: - imageURL = "https://" + imageURL + image_url = images[i] + if 'https://' not in image_url and 'http://' not in image_url: + image_url = "https://" + image_url - print(" ({}/{})".format(i + 1, imagesLenght)) + print(" ({}/{})".format(i + 1, images_length)) print(" {}".format(filename)) try: - getFile(filename, filename, folderDir, imageURL, indent=2) + getFile(filename, filename, folder_dir, image_url, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 - howManyDownloaded -= 1 + how_many_downloaded -= 1 except Exception as exception: # raise exception print("\n Could not get the file") print( " " - + "{class_name}: {info}".format( - class_name=exception.__class__.__name__, - info=str(exception) - ) + + "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)) + "\n" ) - howManyDownloaded -= 1 + how_many_downloaded -= 1 - if duplicates == imagesLenght: + if duplicates == images_length: raise FileAlreadyExistsError - if howManyDownloaded + duplicates < imagesLenght: - raise AlbumNotDownloadedCompletely( - "Album Not Downloaded Completely" - ) - - def getLinks(self, url, lineNumber=129): + elif how_many_downloaded + duplicates < images_length: + raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely") + def getLinks(self, url): content = [] - lineNumber = None + line_number = None class EromeParser(HTMLParser): tag = None @@ -103,22 +90,22 @@ class Erome: def handle_starttag(self, tag, attrs): self.tag = {tag: {attr[0]: attr[1] for attr in attrs}} - pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) + page_source = (urllib.request.urlopen(url).read().decode().split('\n')) """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS""" - for i in range(len(pageSource)): + for i in range(len(page_source)): obj = EromeParser() - obj.feed(pageSource[i]) + obj.feed(page_source[i]) tag = obj.tag if tag is not None: if "div" in tag: if "id" in tag["div"]: if tag["div"]["id"] == "album": - lineNumber = i + line_number = i break - for line in pageSource[lineNumber:]: + for line in page_source[line_number:]: obj = EromeParser() obj.feed(line) tag = obj.tag @@ -130,7 +117,4 @@ class Erome: elif "source" in tag: content.append(tag["source"]["src"]) - return [ - link for link in content - if link.endswith("_480p.mp4") or not link.endswith(".mp4") - ] + return [link for link in content if link.endswith("_480p.mp4") or not link.endswith(".mp4")] diff --git a/src/downloaders/Gallery.py b/src/downloaders/Gallery.py deleted file mode 100644 index cef73f1..0000000 --- a/src/downloaders/Gallery.py +++ /dev/null @@ -1,115 +0,0 @@ -import os -import json -import urllib -import requests - -from src.utils import GLOBAL -from src.utils import printToFile as print -from src.downloaders.downloaderUtils import getFile -from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, NotADownloadableLinkError, TypeInSkip - - -class Gallery: - def __init__(self, directory, post): - - links = post['CONTENTURL'] - - images = {} - count = 0 - for link in links: - path = urllib.parse.urlparse(link).path - base = os.path.basename(path) - name = os.path.splitext(base)[0] - images[count] = {'id': name, 'url': link} - count = count + 1 - - self.directory = directory - self.post = post - - self.downloadAlbum(images, count) - - @staticmethod - def getData(link): - - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", - } - res = requests.get(link, headers=headers) - if res.status_code != 200: - raise ImageNotFound( - f"Server responded with {res.status_code} to {link}") - pageSource = res.text - - STARTING_STRING = "_r = {" - ENDING_STRING = "" - - STARTING_STRING_LENGHT = len(STARTING_STRING) - try: - startIndex = pageSource.index( - STARTING_STRING) + STARTING_STRING_LENGHT - endIndex = pageSource.index(ENDING_STRING, startIndex) - except ValueError: - raise NotADownloadableLinkError( - f"Could not read the page source on {link}") - - data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1]) - return data - - def downloadAlbum(self, images, count): - folderName = GLOBAL.config['filename'].format(**self.post) - folderDir = self.directory / folderName - - howManyDownloaded = 0 - duplicates = 0 - - try: - if not os.path.exists(folderDir): - os.makedirs(folderDir) - except FileNotFoundError: - folderDir = self.directory / self.post['POSTID'] - os.makedirs(folderDir) - - print(folderName) - - for i in range(count): - path = urllib.parse.urlparse(images[i]['url']).path - extension = os.path.splitext(path)[1] - - filename = "_".join([ - str(i + 1), images[i]['id'] - ]) + extension - shortFilename = str(i + 1) + "_" + images[i]['id'] - - print("\n ({}/{})".format(i + 1, count)) - - try: - getFile(filename, shortFilename, folderDir, - images[i]['url'], indent=2) - howManyDownloaded += 1 - print() - - except FileAlreadyExistsError: - print(" The file already exists" + " " * 10, end="\n\n") - duplicates += 1 - - except TypeInSkip: - print(" Skipping...") - howManyDownloaded += 1 - - except Exception as exception: - print("\n Could not get the file") - print( - " " + - "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( - class_name=exception.__class__.__name__, - info=str(exception)) + - "\n") - print(GLOBAL.log_stream.getvalue(), noPrint=True) - - if duplicates == count: - raise FileAlreadyExistsError - if howManyDownloaded + duplicates < count: - raise AlbumNotDownloadedCompletely( - "Album Not Downloaded Completely" - ) diff --git a/src/downloaders/Gfycat.py b/src/downloaders/Gfycat.py index 6366329..3cea2c2 100644 --- a/src/downloaders/Gfycat.py +++ b/src/downloaders/Gfycat.py @@ -1,37 +1,38 @@ import json import os import urllib.request + from bs4 import BeautifulSoup -from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (NotADownloadableLinkError) -from src.utils import GLOBAL +from src.downloaders.downloaderUtils import getExtension, getFile from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork +from src.errors import NotADownloadableLinkError +from src.utils import GLOBAL + class Gfycat: - def __init__(self, directory, POST): + def __init__(self, directory, post): try: - POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) + post['MEDIAURL'] = self.getLink(post['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") - POST['EXTENSION'] = getExtension(POST['MEDIAURL']) + post['EXTENSION'] = getExtension(post['MEDIAURL']) if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] - shortFilename = POST['POSTID'] + POST['EXTENSION'] + filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] + short_filename = post['POSTID'] + post['EXTENSION'] - getFile(filename, shortFilename, directory, POST['MEDIAURL']) + getFile(filename, short_filename, directory, post['MEDIAURL']) @staticmethod def getLink(url): """Extract direct link to the video from page's source and return it """ - if '.webm' in url or '.mp4' in url or '.gif' in url: return url @@ -40,11 +41,10 @@ class Gfycat: url = "https://gfycat.com/" + url.split('/')[-1] - pageSource = (urllib.request.urlopen(url).read().decode()) + page_source = (urllib.request.urlopen(url).read().decode()) - soup = BeautifulSoup(pageSource, "html.parser") - attributes = {"data-react-helmet": "true", - "type": "application/ld+json"} + soup = BeautifulSoup(page_source, "html.parser") + attributes = {"data-react-helmet": "true", "type": "application/ld+json"} content = soup.find("script", attrs=attributes) if content is None: diff --git a/src/downloaders/Imgur.py b/src/downloaders/Imgur.py index 5a38cde..239fc2d 100644 --- a/src/downloaders/Imgur.py +++ b/src/downloaders/Imgur.py @@ -2,19 +2,19 @@ import json import os import requests -from src.utils import GLOBAL, nameCorrector -from src.utils import printToFile as print from src.downloaders.Direct import Direct from src.downloaders.downloaderUtils import getFile -from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip +from src.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError, ImageNotFound, + NotADownloadableLinkError, TypeInSkip) +from src.utils import GLOBAL, nameCorrector +from src.utils import printToFile as print class Imgur: - IMGUR_IMAGE_DOMAIN = "https://i.imgur.com/" + imgur_image_domain = "https://i.imgur.com/" def __init__(self, directory, post): - link = post['CONTENTURL'] if link.endswith(".gifv"): @@ -22,53 +22,49 @@ class Imgur: Direct(directory, {**post, 'CONTENTURL': link}) return None - self.rawData = self.getData(link) + self.raw_data = self.getData(link) self.directory = directory self.post = post if self.isAlbum: - if self.rawData["album_images"]["count"] != 1: - self.downloadAlbum(self.rawData["album_images"]) + if self.raw_data["album_images"]["count"] != 1: + self.downloadAlbum(self.raw_data["album_images"]) else: - self.download(self.rawData["album_images"]["images"][0]) + self.download(self.raw_data["album_images"]["images"][0]) else: - self.download(self.rawData) + self.download(self.raw_data) def downloadAlbum(self, images): - folderName = GLOBAL.config['filename'].format(**self.post) - folderDir = self.directory / folderName + folder_name = GLOBAL.config['filename'].format(**self.post) + folder_dir = self.directory / folder_name - imagesLenght = images["count"] - howManyDownloaded = 0 + images_length = images["count"] + how_many_downloaded = 0 duplicates = 0 try: - if not os.path.exists(folderDir): - os.makedirs(folderDir) + if not os.path.exists(folder_dir): + os.makedirs(folder_dir) except FileNotFoundError: - folderDir = self.directory / self.post['POSTID'] - os.makedirs(folderDir) + folder_dir = self.directory / self.post['POSTID'] + os.makedirs(folder_dir) - print(folderName) - - for i in range(imagesLenght): + print(folder_name) + for i in range(images_length): extension = self.validateExtension(images["images"][i]["ext"]) - - imageURL = self.IMGUR_IMAGE_DOMAIN + \ - images["images"][i]["hash"] + extension - + image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension filename = "_".join([str(i + 1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash']]) + extension - shortFilename = str(i + 1) + "_" + images["images"][i]['hash'] + short_filename = str(i + 1) + "_" + images["images"][i]['hash'] - print("\n ({}/{})".format(i + 1, imagesLenght)) + print("\n ({}/{})".format(i + 1, images_length)) try: - getFile(filename, shortFilename, folderDir, imageURL, indent=2) - howManyDownloaded += 1 + getFile(filename, short_filename, folder_dir, image_url, indent=2) + how_many_downloaded += 1 print() except FileAlreadyExistsError: @@ -77,7 +73,7 @@ class Imgur: except TypeInSkip: print(" Skipping...") - howManyDownloaded += 1 + how_many_downloaded += 1 except Exception as exception: print("\n Could not get the file") @@ -85,69 +81,65 @@ class Imgur: " " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, - info=str(exception)) + - "\n") - print(GLOBAL.log_stream.getvalue(), noPrint=True) + info=str(exception) + ) + + "\n" + ) + print(GLOBAL.log_stream.getvalue(), no_print=True) - if duplicates == imagesLenght: + if duplicates == images_length: raise FileAlreadyExistsError - if howManyDownloaded + duplicates < imagesLenght: - raise AlbumNotDownloadedCompletely( - "Album Not Downloaded Completely" - ) + elif how_many_downloaded + duplicates < images_length: + raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely") def download(self, image): extension = self.validateExtension(image["ext"]) - imageURL = self.IMGUR_IMAGE_DOMAIN + image["hash"] + extension + image_url = self.imgur_image_domain + image["hash"] + extension filename = GLOBAL.config['filename'].format(**self.post) + extension - shortFilename = self.post['POSTID'] + extension + short_filename = self.post['POSTID'] + extension - getFile(filename, shortFilename, self.directory, imageURL) + getFile(filename, short_filename, self.directory, image_url) @property def isAlbum(self): - return "album_images" in self.rawData + return "album_images" in self.raw_data @staticmethod def getData(link): - cookies = {"over18": "1", "postpagebeta": "0"} res = requests.get(link, cookies=cookies) if res.status_code != 200: - raise ImageNotFound( - f"Server responded with {res.status_code} to {link}") - pageSource = requests.get(link, cookies=cookies).text + raise ImageNotFound(f"Server responded with {res.status_code} to {link}") + page_source = requests.get(link, cookies=cookies).text - STARTING_STRING = "image : " - ENDING_STRING = "group :" + starting_string = "image : " + ending_string = "group :" - STARTING_STRING_LENGHT = len(STARTING_STRING) + starting_string_lenght = len(starting_string) try: - startIndex = pageSource.index( - STARTING_STRING) + STARTING_STRING_LENGHT - endIndex = pageSource.index(ENDING_STRING, startIndex) + start_index = page_source.index(starting_string) + starting_string_lenght + end_index = page_source.index(ending_string, start_index) except ValueError: raise NotADownloadableLinkError( f"Could not read the page source on {link}") - while pageSource[endIndex] != "}": - endIndex = endIndex - 1 + while page_source[end_index] != "}": + end_index -= 1 try: - data = pageSource[startIndex:endIndex + 2].strip()[:-1] - except BaseException: - pageSource[endIndex + 1] = '}' - data = pageSource[startIndex:endIndex + 3].strip()[:-1] + data = page_source[start_index:end_index + 2].strip()[:-1] + except Exception: + page_source[end_index + 1] = '}' + data = page_source[start_index:end_index + 3].strip()[:-1] return json.loads(data) @staticmethod def validateExtension(string): - POSSIBLE_EXTENSIONS = [".jpg", ".png", ".mp4", ".gif"] + possible_extensions = [".jpg", ".png", ".mp4", ".gif"] - for extension in POSSIBLE_EXTENSIONS: + for extension in possible_extensions: if extension in string: return extension - - raise ExtensionError( - f"\"{string}\" is not recognized as a valid extension.") + else: + raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.") diff --git a/src/downloaders/downloaderUtils.py b/src/downloaders/downloaderUtils.py index 3bd4605..110e971 100644 --- a/src/downloaders/downloaderUtils.py +++ b/src/downloaders/downloaderUtils.py @@ -1,21 +1,20 @@ -import sys +import hashlib import os +import sys import urllib.request from pathlib import Path -import hashlib +from src.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip from src.utils import GLOBAL from src.utils import printToFile as print -from src.errors import FileAlreadyExistsError, FailedToDownload, TypeInSkip, DomainInSkip -def dlProgress(count, blockSize, totalSize): +def dlProgress(count, block_size, total_size): """Function for writing download progress to console """ - - downloadedMbs = int(count * blockSize * (10**(-6))) - fileSize = int(totalSize * (10**(-6))) - sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize)) + download_mbs = int(count * block_size * (10 ** (-6))) + file_size = int(total_size * (10 ** (-6))) + sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size)) sys.stdout.flush() @@ -23,39 +22,32 @@ def getExtension(link): """Extract file extension from image link. If didn't find any, return '.jpg' """ - - imageTypes = ['jpg', 'png', 'mp4', 'webm', 'gif'] + image_types = ['jpg', 'png', 'mp4', 'webm', 'gif'] parsed = link.split('.') - for fileType in imageTypes: + for fileType in image_types: if fileType in parsed: return "." + parsed[-1] - - if "v.redd.it" not in link: - return '.jpg' - return '.mp4' + else: + if "v.redd.it" not in link: + return '.jpg' + else: + return '.mp4' -def getFile( - filename, - shortFilename, - folderDir, - imageURL, - indent=0, - silent=False): - - FORMATS = { +def getFile(filename, short_filename, folder_dir, image_url, indent=0, silent=False): + formats = { "videos": [".mp4", ".webm"], "images": [".jpg", ".jpeg", ".png", ".bmp"], "gifs": [".gif"], "self": [] } - for type in GLOBAL.arguments.skip: - for extension in FORMATS[type]: + for file_type in GLOBAL.arguments.skip: + for extension in formats[file_type]: if extension in filename: raise TypeInSkip - if any(domain in imageURL for domain in GLOBAL.arguments.skip_domain): + if any(domain in image_url for domain in GLOBAL.arguments.skip_domain): raise DomainInSkip headers = [ @@ -70,44 +62,40 @@ def getFile( ("Connection", "keep-alive") ] - if not os.path.exists(folderDir): - os.makedirs(folderDir) + if not os.path.exists(folder_dir): + os.makedirs(folder_dir) opener = urllib.request.build_opener() - if "imgur" not in imageURL: + if "imgur" not in image_url: opener.addheaders = headers urllib.request.install_opener(opener) if not silent: - print(" " * indent + str(folderDir), - " " * indent + str(filename), - sep="\n") + print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n") for i in range(3): - fileDir = Path(folderDir) / filename - tempDir = Path(folderDir) / (filename + ".tmp") + file_dir = Path(folder_dir) / filename + temp_dir = Path(folder_dir) / (filename + ".tmp") - if not (os.path.isfile(fileDir)): + if not (os.path.isfile(file_dir)): try: - urllib.request.urlretrieve(imageURL, - tempDir, - reporthook=dlProgress) + urllib.request.urlretrieve(image_url, temp_dir, reporthook=dlProgress) - fileHash = createHash(tempDir) + file_hash = createHash(temp_dir) if GLOBAL.arguments.no_dupes: - if fileHash in GLOBAL.downloadedPosts(): - os.remove(tempDir) + if file_hash in GLOBAL.downloadedPosts(): + os.remove(temp_dir) raise FileAlreadyExistsError - GLOBAL.downloadedPosts.add(fileHash) + GLOBAL.downloadedPosts.add(file_hash) - os.rename(tempDir, fileDir) + os.rename(temp_dir, file_dir) if not silent: print(" " * indent + "Downloaded" + " " * 10) return None except ConnectionResetError: raise FailedToDownload except FileNotFoundError: - filename = shortFilename + filename = short_filename else: raise FileAlreadyExistsError raise FailedToDownload diff --git a/src/downloaders/gallery.py b/src/downloaders/gallery.py new file mode 100644 index 0000000..d5cbac4 --- /dev/null +++ b/src/downloaders/gallery.py @@ -0,0 +1,110 @@ +import json +import os +import urllib + +import requests + +from src.downloaders.downloaderUtils import getFile +from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound, NotADownloadableLinkError, + TypeInSkip) +from src.utils import GLOBAL +from src.utils import printToFile as print + + +class Gallery: + def __init__(self, directory, post): + link = post['CONTENTURL'] + self.raw_data = self.getData(link) + + self.directory = directory + self.post = post + + images = {} + count = 0 + for model in self.raw_data['posts']['models']: + try: + for item in self.raw_data['posts']['models'][model]['media']['gallery']['items']: + try: + images[count] = {'id': item['mediaId'], 'url': self.raw_data['posts'] + ['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} + count += 1 + except Exception: + continue + except Exception: + continue + + self.downloadAlbum(images, count) + + @staticmethod + def getData(link): + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + } + res = requests.get(link, headers=headers) + if res.status_code != 200: + raise ImageNotFound(f"Server responded with {res.status_code} to {link}") + page_source = res.text + + starting_string = "_r = {" + ending_string = "" + + starting_string_lenght = len(starting_string) + try: + start_index = page_source.index(starting_string) + starting_string_lenght + end_index = page_source.index(ending_string, start_index) + except ValueError: + raise NotADownloadableLinkError(f"Could not read the page source on {link}") + + data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1]) + return data + + def downloadAlbum(self, images, count): + folder_name = GLOBAL.config['filename'].format(**self.post) + folder_dir = self.directory / folder_name + + how_many_downloaded = 0 + duplicates = 0 + + try: + if not os.path.exists(folder_dir): + os.makedirs(folder_dir) + except FileNotFoundError: + folder_dir = self.directory / self.post['POSTID'] + os.makedirs(folder_dir) + + print(folder_name) + + for i in range(count): + path = urllib.parse.urlparse(images[i]['url']).path + extension = os.path.splitext(path)[1] + + filename = "_".join([str(i + 1), images[i]['id']]) + extension + short_filename = str(i + 1) + "_" + images[i]['id'] + + print("\n ({}/{})".format(i + 1, count)) + + try: + getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2) + how_many_downloaded += 1 + print() + + except FileAlreadyExistsError: + print(" The file already exists" + " " * 10, end="\n\n") + duplicates += 1 + + except TypeInSkip: + print(" Skipping...") + how_many_downloaded += 1 + + except Exception as exception: + print("\n Could not get the file") + print(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + class_name=exception.__class__.__name__, info=str(exception)) + "\n" + ) + print(GLOBAL.log_stream.getvalue(), no_print=True) + + if duplicates == count: + raise FileAlreadyExistsError + elif how_many_downloaded + duplicates < count: + raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely") diff --git a/src/downloaders/gifDeliveryNetwork.py b/src/downloaders/gifDeliveryNetwork.py index bd6002b..a41b7ea 100644 --- a/src/downloaders/gifDeliveryNetwork.py +++ b/src/downloaders/gifDeliveryNetwork.py @@ -1,52 +1,49 @@ import os import urllib.request + from bs4 import BeautifulSoup -from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (NotADownloadableLinkError) +from src.downloaders.downloaderUtils import getExtension, getFile +from src.errors import NotADownloadableLinkError from src.utils import GLOBAL class GifDeliveryNetwork: - def __init__(self, directory, POST): + def __init__(self, directory, post): try: - POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) + post['MEDIAURL'] = self.getLink(post['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") - POST['EXTENSION'] = getExtension(POST['MEDIAURL']) + post['EXTENSION'] = getExtension(post['MEDIAURL']) if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] - shortFilename = POST['POSTID'] + POST['EXTENSION'] + filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] + short_filename = post['POSTID'] + post['EXTENSION'] - getFile(filename, shortFilename, directory, POST['MEDIAURL']) + getFile(filename, short_filename, directory, post['MEDIAURL']) @staticmethod def getLink(url): """Extract direct link to the video from page's source and return it """ - - if '.webm' in url.split( - '/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]: + if '.webm' in url.split('/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]: return url if url[-1:] == '/': url = url[:-1] url = "https://www.gifdeliverynetwork.com/" + url.split('/')[-1] + page_source = (urllib.request.urlopen(url).read().decode()) - pageSource = (urllib.request.urlopen(url).read().decode()) - - soup = BeautifulSoup(pageSource, "html.parser") + soup = BeautifulSoup(page_source, "html.parser") attributes = {"id": "mp4Source", "type": "video/mp4"} content = soup.find("source", attrs=attributes) if content is None: - raise NotADownloadableLinkError("Could not read the page source") return content["src"] diff --git a/src/downloaders/redgifs.py b/src/downloaders/redgifs.py index f87631b..b12c17d 100644 --- a/src/downloaders/redgifs.py +++ b/src/downloaders/redgifs.py @@ -1,36 +1,36 @@ import json import os import urllib.request + from bs4 import BeautifulSoup -from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (NotADownloadableLinkError) +from src.downloaders.downloaderUtils import getExtension, getFile +from src.errors import NotADownloadableLinkError from src.utils import GLOBAL class Redgifs: - def __init__(self, directory, POST): + def __init__(self, directory, post): try: - POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) + post['MEDIAURL'] = self.getLink(post['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") - POST['EXTENSION'] = getExtension(POST['MEDIAURL']) + post['EXTENSION'] = getExtension(post['MEDIAURL']) if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] - shortFilename = POST['POSTID'] + POST['EXTENSION'] + filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] + short_filename = post['POSTID'] + post['EXTENSION'] - getFile(filename, shortFilename, directory, POST['MEDIAURL']) + getFile(filename, short_filename, directory, post['MEDIAURL']) @staticmethod def getLink(url): """Extract direct link to the video from page's source and return it """ - if '.webm' in url or '.mp4' in url or '.gif' in url: return url @@ -44,11 +44,10 @@ class Redgifs: 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64') - pageSource = (urllib.request.urlopen(url).read().decode()) + page_source = (urllib.request.urlopen(url).read().decode()) - soup = BeautifulSoup(pageSource, "html.parser") - attributes = {"data-react-helmet": "true", - "type": "application/ld+json"} + soup = BeautifulSoup(page_source, "html.parser") + attributes = {"data-react-helmet": "true", "type": "application/ld+json"} content = soup.find("script", attrs=attributes) if content is None: diff --git a/src/downloaders/selfPost.py b/src/downloaders/selfPost.py index f7a8a47..bae8b88 100644 --- a/src/downloaders/selfPost.py +++ b/src/downloaders/selfPost.py @@ -5,13 +5,13 @@ from pathlib import Path from src.errors import FileAlreadyExistsError, TypeInSkip from src.utils import GLOBAL +from src.utils import printToFile as print VanillaPrint = print class SelfPost: def __init__(self, directory, post): - if "self" in GLOBAL.arguments.skip: raise TypeInSkip @@ -20,20 +20,20 @@ class SelfPost: filename = GLOBAL.config['filename'].format(**post) - fileDir = directory / (filename + ".md") - print(fileDir) + file_dir = directory / (filename + ".md") + print(file_dir) print(filename + ".md") - if Path.is_file(fileDir): + if Path.is_file(file_dir): raise FileAlreadyExistsError try: - self.writeToFile(fileDir, post) + self.writeToFile(file_dir, post) except FileNotFoundError: - fileDir = post['POSTID'] + ".md" - fileDir = directory / fileDir + file_dir = post['POSTID'] + ".md" + file_dir = directory / file_dir - self.writeToFile(fileDir, post) + self.writeToFile(file_dir, post) @staticmethod def writeToFile(directory, post): @@ -57,5 +57,4 @@ class SelfPost: with io.open(directory, "w", encoding="utf-8") as FILE: VanillaPrint(content, file=FILE) - print("Downloaded") diff --git a/src/downloaders/vreddit.py b/src/downloaders/vreddit.py index 7194042..16f5296 100644 --- a/src/downloaders/vreddit.py +++ b/src/downloaders/vreddit.py @@ -13,45 +13,40 @@ class VReddit: os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) + extension - shortFilename = post['POSTID'] + extension + short_filename = post['POSTID'] + extension try: - FNULL = open(os.devnull, 'w') - subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT) - except BaseException: - getFile(filename, shortFilename, directory, post['CONTENTURL']) + fnull = open(os.devnull, 'w') + subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT) + except Exception: + getFile(filename, short_filename, directory, post['CONTENTURL']) print("FFMPEG library not found, skipping merging video and audio") else: - videoName = post['POSTID'] + "_video" - videoURL = post['CONTENTURL'] - audioName = post['POSTID'] + "_audio" - audioURL = videoURL[:videoURL.rfind('/')] + '/DASH_audio.mp4' + video_name = post['POSTID'] + "_video" + video_url = post['CONTENTURL'] + audio_name = post['POSTID'] + "_audio" + audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4' print(directory, filename, sep="\n") - getFile(videoName, videoName, directory, videoURL, silent=True) - getFile(audioName, audioName, directory, audioURL, silent=True) + getFile(video_name, video_name, directory, video_url, silent=True) + getFile(audio_name, audio_name, directory, audio_url, silent=True) try: - self._mergeAudio(videoName, - audioName, - filename, - shortFilename, - directory) + self._mergeAudio(video_name, audio_name, filename, short_filename, directory) except KeyboardInterrupt: os.remove(directory / filename) - os.remove(directory / audioName) - - os.rename(directory / videoName, directory / filename) + os.remove(directory / audio_name) + os.rename(directory / video_name, directory / filename) @staticmethod - def _mergeAudio(video, audio, filename, shortFilename, directory): + def _mergeAudio(video, audio, filename, short_filename, directory): + input_video = str(directory / video) + input_audio = str(directory / audio) - inputVideo = str(directory / video) - inputAudio = str(directory / audio) - - FNULL = open(os.devnull, 'w') - cmd = f"ffmpeg -i {inputAudio} -i {inputVideo} -c:v copy -c:a aac -strict experimental {str(directory / filename)}" - subprocess.call(cmd.split(), stdout=FNULL, stderr=subprocess.STDOUT) + fnull = open(os.devnull, 'w') + cmd = "ffmpeg -i {} -i {} -c:v copy -c:a aac -strict experimental {}".format( + input_audio, input_video, str(directory / filename)) + subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT) os.remove(directory / video) os.remove(directory / audio) diff --git a/src/downloaders/youtube.py b/src/downloaders/youtube.py index a5c7922..0b0f0e2 100644 --- a/src/downloaders/youtube.py +++ b/src/downloaders/youtube.py @@ -1,12 +1,13 @@ import os -import youtube_dl import sys -from src.downloaders.downloaderUtils import createHash +import youtube_dl +from src.downloaders.downloaderUtils import createHash +from src.errors import FileAlreadyExistsError from src.utils import GLOBAL from src.utils import printToFile as print -from src.errors import FileAlreadyExistsError + class Youtube: @@ -35,19 +36,19 @@ class Youtube: if GLOBAL.arguments.no_dupes: try: - fileHash = createHash(location) + file_hash = createHash(location) except FileNotFoundError: return None - if fileHash in GLOBAL.downloadedPosts(): + if file_hash in GLOBAL.downloadedPosts(): os.remove(location) raise FileAlreadyExistsError - GLOBAL.downloadedPosts.add(fileHash) + GLOBAL.downloadedPosts.add(file_hash) @staticmethod def _hook(d): if d['status'] == 'finished': return print("Downloaded") - downloadedMbs = int(d['downloaded_bytes'] * (10**(-6))) - fileSize = int(d['total_bytes'] * (10**(-6))) - sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize)) + downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6))) + file_size = int(d['total_bytes'] * (10**(-6))) + sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size)) sys.stdout.flush() diff --git a/src/errors.py b/src/errors.py index 39d90be..7bf47b9 100644 --- a/src/errors.py +++ b/src/errors.py @@ -100,9 +100,6 @@ class InvalidSortingType(Exception): pass -class FileNotFoundError(Exception): - pass - class NoSuitablePost(Exception): pass diff --git a/src/jsonHelper.py b/src/jsonHelper.py index 11508b4..3f990f0 100644 --- a/src/jsonHelper.py +++ b/src/jsonHelper.py @@ -6,47 +6,43 @@ from src.errors import InvalidJSONFile class JsonFile: """ Write and read JSON files - Use add(self,toBeAdded) to add to files - Use delete(self,*deletedKeys) to delete keys """ - FILEDIR = "" + file_dir = "" - def __init__(self, FILEDIR): - self.FILEDIR = FILEDIR - if not path.exists(self.FILEDIR): + def __init__(self, file_dir): + self.file_dir = file_dir + if not path.exists(self.file_dir): self.__writeToFile({}, create=True) def read(self): try: - with open(self.FILEDIR, 'r') as f: + with open(self.file_dir, 'r') as f: return json.load(f) except json.decoder.JSONDecodeError: - raise InvalidJSONFile(f"{self.FILEDIR} cannot be read") + raise InvalidJSONFile(f"{self.file_dir} cannot be read") - def add(self, toBeAdded, sub=None): + def add(self, to_be_added, sub=None): """Takes a dictionary and merges it with json file. It uses new key's value if a key already exists. Returns the new content as a dictionary. """ - data = self.read() if sub: - data[sub] = {**data[sub], **toBeAdded} + data[sub] = {**data[sub], **to_be_added} else: - data = {**data, **toBeAdded} + data = {**data, **to_be_added} self.__writeToFile(data) return self.read() - def delete(self, *deleteKeys): + def delete(self, *delete_keys): """Delete given keys from JSON file. Returns the new content as a dictionary. """ - data = self.read() - for deleteKey in deleteKeys: + for deleteKey in delete_keys: if deleteKey in data: del data[deleteKey] found = True @@ -56,6 +52,6 @@ class JsonFile: def __writeToFile(self, content, create=False): if not create: - remove(self.FILEDIR) - with open(self.FILEDIR, 'w') as f: + remove(self.file_dir) + with open(self.file_dir, 'w') as f: json.dump(content, f, indent=4) diff --git a/src/parser.py b/src/parser.py index b48ea6d..1da6436 100644 --- a/src/parser.py +++ b/src/parser.py @@ -6,142 +6,136 @@ except ModuleNotFoundError: from errors import InvalidRedditLink -def QueryParser(PassedQueries, index): - ExtractedQueries = {} +def QueryParser(passed_queries, index): + extracted_queries = {} - QuestionMarkIndex = PassedQueries.index("?") - Header = PassedQueries[:QuestionMarkIndex] - ExtractedQueries["HEADER"] = Header - Queries = PassedQueries[QuestionMarkIndex + 1:] + question_mark_index = passed_queries.index("?") + header = passed_queries[:question_mark_index] + extracted_queries["HEADER"] = header + queries = passed_queries[question_mark_index + 1:] - ParsedQueries = Queries.split("&") + parsed_queries = queries.split("&") - for Query in ParsedQueries: - Query = Query.split("=") - ExtractedQueries[Query[0]] = Query[1] + for query in parsed_queries: + query = query.split("=") + extracted_queries[query[0]] = query[1] - if ExtractedQueries["HEADER"] == "search": - ExtractedQueries["q"] = ExtractedQueries["q"].replace("%20", " ") + if extracted_queries["HEADER"] == "search": + extracted_queries["q"] = extracted_queries["q"].replace("%20", " ") - return ExtractedQueries + return extracted_queries -def LinkParser(LINK): - RESULT = {} - ShortLink = False +def LinkParser(link): + result = {} + short_link = False - if "reddit.com" not in LINK: + if "reddit.com" not in link: raise InvalidRedditLink("Invalid reddit link") - SplittedLink = LINK.split("/") + splitted_link = link.split("/") - if SplittedLink[0] == "https:" or SplittedLink[0] == "http:": - SplittedLink = SplittedLink[2:] + if splitted_link[0] == "https:" or splitted_link[0] == "http:": + splitted_link = splitted_link[2:] try: - if (SplittedLink[-2].endswith("reddit.com") and - SplittedLink[-1] == "") or \ - SplittedLink[-1].endswith("reddit.com"): + if (splitted_link[-2].endswith("reddit.com") and + splitted_link[-1] == "") or splitted_link[-1].endswith("reddit.com"): - RESULT["sort"] = "best" - return RESULT + result["sort"] = "best" + return result except IndexError: - if SplittedLink[0].endswith("reddit.com"): - RESULT["sort"] = "best" - return RESULT + if splitted_link[0].endswith("reddit.com"): + result["sort"] = "best" + return result - if "redd.it" in SplittedLink: - ShortLink = True + if "redd.it" in splitted_link: + short_link = True - if SplittedLink[0].endswith("reddit.com"): - SplittedLink = SplittedLink[1:] + if splitted_link[0].endswith("reddit.com"): + splitted_link = splitted_link[1:] - if "comments" in SplittedLink: - RESULT = {"post": LINK} - return RESULT + if "comments" in splitted_link: + result = {"post": link} + return result - if "me" in SplittedLink or \ - "u" in SplittedLink or \ - "user" in SplittedLink or \ - "r" in SplittedLink or \ - "m" in SplittedLink: + elif "me" in splitted_link or \ + "u" in splitted_link or \ + "user" in splitted_link or \ + "r" in splitted_link or \ + "m" in splitted_link: - if "r" in SplittedLink: - RESULT["subreddit"] = SplittedLink[SplittedLink.index("r") + 1] + if "r" in splitted_link: + result["subreddit"] = splitted_link[splitted_link.index("r") + 1] - elif "m" in SplittedLink: - RESULT["multireddit"] = SplittedLink[SplittedLink.index("m") + 1] - RESULT["user"] = SplittedLink[SplittedLink.index("m") - 1] + elif "m" in splitted_link: + result["multireddit"] = splitted_link[splitted_link.index("m") + 1] + result["user"] = splitted_link[splitted_link.index("m") - 1] else: - for index in range(len(SplittedLink)): - if SplittedLink[index] == "u" or \ - SplittedLink[index] == "user": + for index in range(len(splitted_link)): + if splitted_link[index] == "u" or splitted_link[index] == "user": + result["user"] = splitted_link[index + 1] - RESULT["user"] = SplittedLink[index + 1] + elif splitted_link[index] == "me": + result["user"] = "me" - elif SplittedLink[index] == "me": - RESULT["user"] = "me" - - for index in range(len(SplittedLink)): - if SplittedLink[index] in [ + for index in range(len(splitted_link)): + if splitted_link[index] in [ "hot", "top", "new", "controversial", "rising" ]: - RESULT["sort"] = SplittedLink[index] + result["sort"] = splitted_link[index] if index == 0: - RESULT["subreddit"] = "frontpage" + result["subreddit"] = "frontpage" - elif SplittedLink[index] in ["submitted", "saved", "posts", "upvoted"]: - if SplittedLink[index] == "submitted" or \ - SplittedLink[index] == "posts": - RESULT["submitted"] = {} + elif splitted_link[index] in ["submitted", "saved", "posts", "upvoted"]: + if splitted_link[index] == "submitted" or splitted_link[index] == "posts": + result["submitted"] = {} - elif SplittedLink[index] == "saved": - RESULT["saved"] = True + elif splitted_link[index] == "saved": + result["saved"] = True - elif SplittedLink[index] == "upvoted": - RESULT["upvoted"] = True + elif splitted_link[index] == "upvoted": + result["upvoted"] = True - elif "?" in SplittedLink[index]: - ParsedQuery = QueryParser(SplittedLink[index], index) - if ParsedQuery["HEADER"] == "search": - del ParsedQuery["HEADER"] - RESULT["search"] = ParsedQuery + elif "?" in splitted_link[index]: + parsed_query = QueryParser(splitted_link[index], index) + if parsed_query["HEADER"] == "search": + del parsed_query["HEADER"] + result["search"] = parsed_query - elif ParsedQuery["HEADER"] == "submitted" or \ - ParsedQuery["HEADER"] == "posts": - del ParsedQuery["HEADER"] - RESULT["submitted"] = ParsedQuery + elif parsed_query["HEADER"] == "submitted" or \ + parsed_query["HEADER"] == "posts": + del parsed_query["HEADER"] + result["submitted"] = parsed_query else: - del ParsedQuery["HEADER"] - RESULT["queries"] = ParsedQuery + del parsed_query["HEADER"] + result["queries"] = parsed_query - if not ("upvoted" in RESULT or - "saved" in RESULT or - "submitted" in RESULT or - "multireddit" in RESULT) and \ - "user" in RESULT: - RESULT["submitted"] = {} + if not ("upvoted" in result or + "saved" in result or + "submitted" in result or + "multireddit" in result) and "user" in result: + result["submitted"] = {} - return RESULT + return result -def LinkDesigner(LINK): - - attributes = LinkParser(LINK) - MODE = {} +def LinkDesigner(link): + attributes = LinkParser(link) + mode = {} if "post" in attributes: - MODE["post"] = attributes["post"] - MODE["sort"] = "" - MODE["time"] = "" - return MODE + mode["post"] = attributes["post"] + mode["sort"] = "" + mode["time"] = "" + return mode - if "search" in attributes: - MODE["search"] = attributes["search"]["q"] + elif "search" in attributes: + mode["search"] = attributes["search"]["q"] if "restrict_sr" in attributes["search"]: @@ -150,91 +144,90 @@ def LinkDesigner(LINK): attributes["search"]["restrict_sr"] == ""): if "subreddit" in attributes: - MODE["subreddit"] = attributes["subreddit"] + mode["subreddit"] = attributes["subreddit"] elif "multireddit" in attributes: - MODE["multreddit"] = attributes["multireddit"] - MODE["user"] = attributes["user"] + mode["multreddit"] = attributes["multireddit"] + mode["user"] = attributes["user"] else: - MODE["subreddit"] = "all" + mode["subreddit"] = "all" else: - MODE["subreddit"] = "all" + mode["subreddit"] = "all" if "t" in attributes["search"]: - MODE["time"] = attributes["search"]["t"] + mode["time"] = attributes["search"]["t"] else: - MODE["time"] = "all" + mode["time"] = "all" if "sort" in attributes["search"]: - MODE["sort"] = attributes["search"]["sort"] + mode["sort"] = attributes["search"]["sort"] else: - MODE["sort"] = "relevance" + mode["sort"] = "relevance" if "include_over_18" in attributes["search"]: - if attributes["search"]["include_over_18"] == 1 or \ - attributes["search"]["include_over_18"] == "on": - MODE["nsfw"] = True + if attributes["search"]["include_over_18"] == 1 or attributes["search"]["include_over_18"] == "on": + mode["nsfw"] = True else: - MODE["nsfw"] = False + mode["nsfw"] = False else: if "queries" in attributes: - if not ("submitted" in attributes or - "posts" in attributes): + if not ("submitted" in attributes or "posts" in attributes): if "t" in attributes["queries"]: - MODE["time"] = attributes["queries"]["t"] + mode["time"] = attributes["queries"]["t"] else: - MODE["time"] = "day" + mode["time"] = "day" else: if "t" in attributes["queries"]: - MODE["time"] = attributes["queries"]["t"] + mode["time"] = attributes["queries"]["t"] else: - MODE["time"] = "all" + mode["time"] = "all" if "sort" in attributes["queries"]: - MODE["sort"] = attributes["queries"]["sort"] + mode["sort"] = attributes["queries"]["sort"] else: - MODE["sort"] = "new" + mode["sort"] = "new" else: - MODE["time"] = "day" + mode["time"] = "day" if "subreddit" in attributes and "search" not in attributes: - MODE["subreddit"] = attributes["subreddit"] + mode["subreddit"] = attributes["subreddit"] elif "user" in attributes and "search" not in attributes: - MODE["user"] = attributes["user"] + mode["user"] = attributes["user"] if "submitted" in attributes: - MODE["submitted"] = True + mode["submitted"] = True if "sort" in attributes["submitted"]: - MODE["sort"] = attributes["submitted"]["sort"] - elif "sort" in MODE: + mode["sort"] = attributes["submitted"]["sort"] + elif "sort" in mode: pass else: - MODE["sort"] = "new" + mode["sort"] = "new" if "t" in attributes["submitted"]: - MODE["time"] = attributes["submitted"]["t"] + mode["time"] = attributes["submitted"]["t"] else: - MODE["time"] = "all" + mode["time"] = "all" elif "saved" in attributes: - MODE["saved"] = True + mode["saved"] = True elif "upvoted" in attributes: - MODE["upvoted"] = True + mode["upvoted"] = True elif "multireddit" in attributes: - MODE["multireddit"] = attributes["multireddit"] + mode["multireddit"] = attributes["multireddit"] if "sort" in attributes: - MODE["sort"] = attributes["sort"] - elif "sort" in MODE: + mode["sort"] = attributes["sort"] + elif "sort" in mode: pass else: - MODE["sort"] = "hot" + mode["sort"] = "hot" + + return mode - return MODE if __name__ == "__main__": diff --git a/src/programMode.py b/src/programMode.py index 963e665..5fdd0ba 100644 --- a/src/programMode.py +++ b/src/programMode.py @@ -1,7 +1,9 @@ -from src.errors import SearchModeError, RedditorNameError, ProgramModeError, InvalidSortingType -from src.parser import LinkDesigner -from pathlib import Path import sys +from pathlib import Path + +from src.errors import InvalidSortingType, ProgramModeError, RedditorNameError, SearchModeError +from src.parser import LinkDesigner + class ProgramMode: @@ -10,213 +12,183 @@ class ProgramMode: self.arguments = arguments def generate(self): - try: self._validateProgramMode() except ProgramModeError: self._promptUser() - programMode = {} + program_mode = {} if self.arguments.user is not None: - programMode["user"] = self.arguments.user + program_mode["user"] = self.arguments.user if self.arguments.search is not None: - programMode["search"] = self.arguments.search + program_mode["search"] = self.arguments.search if self.arguments.sort == "hot" or \ self.arguments.sort == "controversial" or \ self.arguments.sort == "rising": self.arguments.sort = "relevance" if self.arguments.sort is not None: - programMode["sort"] = self.arguments.sort + program_mode["sort"] = self.arguments.sort else: if self.arguments.submitted: - programMode["sort"] = "new" + program_mode["sort"] = "new" else: - programMode["sort"] = "hot" + program_mode["sort"] = "hot" if self.arguments.time is not None: - programMode["time"] = self.arguments.time + program_mode["time"] = self.arguments.time else: - programMode["time"] = "all" + program_mode["time"] = "all" if self.arguments.link is not None: - self.arguments.link = self.arguments.link.strip("\"") - programMode = LinkDesigner(self.arguments.link) + program_mode = LinkDesigner(self.arguments.link) if self.arguments.search is not None: - programMode["search"] = self.arguments.search + program_mode["search"] = self.arguments.search if self.arguments.sort is not None: - programMode["sort"] = self.arguments.sort + program_mode["sort"] = self.arguments.sort if self.arguments.time is not None: - programMode["time"] = self.arguments.time + program_mode["time"] = self.arguments.time elif self.arguments.subreddit is not None: if isinstance(self.arguments.subreddit, list): self.arguments.subreddit = "+".join(self.arguments.subreddit) - programMode["subreddit"] = self.arguments.subreddit + program_mode["subreddit"] = self.arguments.subreddit elif self.arguments.multireddit is not None: - programMode["multireddit"] = self.arguments.multireddit + program_mode["multireddit"] = self.arguments.multireddit elif self.arguments.saved is True: - programMode["saved"] = True + program_mode["saved"] = True elif self.arguments.upvoted is True: - programMode["upvoted"] = True + program_mode["upvoted"] = True elif self.arguments.submitted is not None: - programMode["submitted"] = True + program_mode["submitted"] = True if self.arguments.sort == "rising": raise InvalidSortingType("Invalid sorting type has given") - programMode["limit"] = self.arguments.limit + program_mode["limit"] = self.arguments.limit - return programMode + return program_mode @staticmethod def _chooseFrom(choices): print() - choicesByIndex = [str(x) for x in range(len(choices) + 1)] + choices_by_index = list(str(x) for x in range(len(choices) + 1)) for i in range(len(choices)): - print("{indent}[{order}] {mode}".format( - indent=" " * 4, order=i + 1, mode=choices[i] - )) + print("{indent}[{order}] {mode}".format(indent=" " * 4, order=i + 1, mode=choices[i])) print(" " * 4 + "[0] exit\n") choice = input("> ") - while not choice.lower() in choices + choicesByIndex + ["exit"]: + while not choice.lower() in choices + choices_by_index + ["exit"]: print("Invalid input\n") input("> ") if choice == "0" or choice == "exit": sys.exit() - elif choice in choicesByIndex: + elif choice in choices_by_index: return choices[int(choice) - 1] else: return choice def _promptUser(self): print("select program mode:") - programModes = [ - "search", "subreddit", "multireddit", - "submitted", "upvoted", "saved", "log" - ] - programMode = self._chooseFrom(programModes) + program_modes = ["search", "subreddit", "multireddit", "submitted", "upvoted", "saved", "log"] + program_mode = self._chooseFrom(program_modes) - if programMode == "search": + if program_mode == "search": self.arguments.search = input("\nquery: ") self.arguments.subreddit = input("\nsubreddit: ") print("\nselect sort type:") - sortTypes = [ - "relevance", "top", "new" - ] - sortType = self._chooseFrom(sortTypes) - self.arguments.sort = sortType + sort_types = ["relevance", "top", "new"] + sort_type = self._chooseFrom(sort_types) + self.arguments.sort = sort_type print("\nselect time filter:") - timeFilters = [ - "hour", "day", "week", "month", "year", "all" - ] - timeFilter = self._chooseFrom(timeFilters) - self.arguments.time = timeFilter + time_filters = ["hour", "day", "week", "month", "year", "all"] + time_filter = self._chooseFrom(time_filters) + self.arguments.time = time_filter - if programMode == "subreddit": - - subredditInput = input( - "(type frontpage for all subscribed subreddits,\n" - " use plus to seperate multi subreddits:" - " pics+funny+me_irl etc.)\n\n" - "subreddit: ") - self.arguments.subreddit = subredditInput - - # while not (subredditInput == "" or subredditInput.lower() == "frontpage"): - # subredditInput = input("subreddit: ") - # self.arguments.subreddit += "+" + subredditInput + if program_mode == "subreddit": + subreddit_input = input("(type frontpage for all subscribed subreddits,\n" + " use plus to seperate multi subreddits:" + " pics+funny+me_irl etc.)\n\n" + "subreddit: ") + self.arguments.subreddit = subreddit_input if " " in self.arguments.subreddit: self.arguments.subreddit = "+".join( self.arguments.subreddit.split()) # DELETE THE PLUS (+) AT THE END - if not subredditInput.lower() == "frontpage" \ - and self.arguments.subreddit[-1] == "+": + if not subreddit_input.lower() == "frontpage" and self.arguments.subreddit[-1] == "+": self.arguments.subreddit = self.arguments.subreddit[:-1] print("\nselect sort type:") - sortTypes = [ - "hot", "top", "new", "rising", "controversial" - ] - sortType = self._chooseFrom(sortTypes) - self.arguments.sort = sortType + sort_types = ["hot", "top", "new", "rising", "controversial"] + sort_type = self._chooseFrom(sort_types) + self.arguments.sort = sort_type - if sortType in ["top", "controversial"]: + if sort_type in ["top", "controversial"]: print("\nselect time filter:") - timeFilters = [ - "hour", "day", "week", "month", "year", "all" - ] - timeFilter = self._chooseFrom(timeFilters) - self.arguments.time = timeFilter + time_filters = ["hour", "day", "week", "month", "year", "all"] + time_filter = self._chooseFrom(time_filters) + self.arguments.time = time_filter else: self.arguments.time = "all" - elif programMode == "multireddit": + elif program_mode == "multireddit": self.arguments.user = input("\nmultireddit owner: ") self.arguments.multireddit = input("\nmultireddit: ") print("\nselect sort type:") - sortTypes = [ - "hot", "top", "new", "rising", "controversial" - ] - sortType = self._chooseFrom(sortTypes) - self.arguments.sort = sortType + sort_types = ["hot", "top", "new", "rising", "controversial"] + sort_type = self._chooseFrom(sort_types) + self.arguments.sort = sort_type - if sortType in ["top", "controversial"]: + if sort_type in ["top", "controversial"]: print("\nselect time filter:") - timeFilters = [ - "hour", "day", "week", "month", "year", "all" - ] - timeFilter = self._chooseFrom(timeFilters) - self.arguments.time = timeFilter + time_filters = ["hour", "day", "week", "month", "year", "all"] + time_filter = self._chooseFrom(time_filters) + self.arguments.time = time_filter else: self.arguments.time = "all" - elif programMode == "submitted": + elif program_mode == "submitted": self.arguments.submitted = True self.arguments.user = input("\nredditor: ") print("\nselect sort type:") - sortTypes = [ - "hot", "top", "new", "controversial" - ] - sortType = self._chooseFrom(sortTypes) - self.arguments.sort = sortType + sort_types = ["hot", "top", "new", "controversial"] + sort_type = self._chooseFrom(sort_types) + self.arguments.sort = sort_type - if sortType == "top": + if sort_type == "top": print("\nselect time filter:") - timeFilters = [ - "hour", "day", "week", "month", "year", "all" - ] - timeFilter = self._chooseFrom(timeFilters) - self.arguments.time = timeFilter + time_filters = ["hour", "day", "week", "month", "year", "all"] + time_filter = self._chooseFrom(time_filters) + self.arguments.time = time_filter else: self.arguments.time = "all" - elif programMode == "upvoted": + elif program_mode == "upvoted": self.arguments.upvoted = True self.arguments.user = input("\nredditor: ") - elif programMode == "saved": + elif program_mode == "saved": self.arguments.saved = True - elif programMode == "log": + elif program_mode == "log": while True: self.arguments.log = input("\nlog file directory:") if Path(self.arguments.log).is_file(): @@ -234,7 +206,6 @@ class ProgramMode: """Check if command-line self.arguments are given correcly, if not, raise errors """ - if self.arguments.user is None: user = 0 else: @@ -242,21 +213,13 @@ class ProgramMode: search = 1 if self.arguments.search else 0 - modes = [ - "saved", - "subreddit", - "submitted", - "log", - "link", - "upvoted", - "multireddit"] + modes = ["saved", "subreddit", "submitted", "log", "link", "upvoted", "multireddit"] - values = { - x: 0 if getattr(self.arguments, x) is None or - getattr(self.arguments, x) is False - else 1 - for x in modes - } + values = {x: 0 if getattr(self.arguments, x) is None or + getattr(self.arguments, x) is False + else 1 + for x in modes + } if not sum(values[x] for x in values) == 1: raise ProgramModeError("Invalid program mode") diff --git a/src/reddit.py b/src/reddit.py index 9dc374c..a953c87 100644 --- a/src/reddit.py +++ b/src/reddit.py @@ -1,12 +1,14 @@ -import praw import random import socket import webbrowser + +import praw from prawcore.exceptions import ResponseException -from src.utils import GLOBAL +from src.errors import RedditLoginFailed from src.jsonHelper import JsonFile -from src. errors import RedditLoginFailed +from src.utils import GLOBAL + class Reddit: @@ -23,7 +25,6 @@ class Reddit: } def begin(self): - if self.refresh_token: self.arguments["refresh_token"] = self.refresh_token self.redditInstance = praw.Reddit(**self.arguments) @@ -41,11 +42,8 @@ class Reddit: self.redditInstance = praw.Reddit(**self.arguments) reddit, refresh_token = self.getRefreshToken(*self.SCOPES) - JsonFile(GLOBAL.configDirectory).add({ - "reddit_username": str(reddit.user.me()), - "reddit": refresh_token - }, "credentials") - + JsonFile(GLOBAL.configDirectory).add({"reddit_username": str( + reddit.user.me()), "reddit": refresh_token}, "credentials") return self.redditInstance def recieve_connection(self): @@ -63,33 +61,23 @@ class Reddit: @staticmethod def send_message(client, message): """Send message to client and close the connection.""" - client.send( - 'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8') - ) + client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')) client.close() def getRefreshToken(self, *scopes): state = str(random.randint(0, 65000)) url = self.redditInstance.auth.url(scopes, state, 'permanent') print("---Setting up the Reddit API---\n") - print( - "Go to this URL and login to reddit:\n", - url, - sep="\n", - end="\n\n") + print("Go to this URL and login to reddit:\n", url, sep="\n", end="\n\n") webbrowser.open(url, new=2) client = self.recieve_connection() data = client.recv(1024).decode('utf-8') str(data) param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&') - params = dict([token.split('=') - for token in param_tokens]) + params = {key: value for (key, value) in [token.split('=') for token in param_tokens]} if state != params['state']: - self.send_message( - client, 'State mismatch. Expected: {} Received: {}' - .format(state, params['state']) - ) + self.send_message(client, 'State mismatch. Expected: {} Received: {}'.format(state, params['state'])) raise RedditLoginFailed if 'error' in params: self.send_message(client, params['error']) @@ -101,4 +89,4 @@ class Reddit: "alert(\"You can go back to terminal window now.\");" "" ) - return (self.redditInstance, refresh_token) + return self.redditInstance, refresh_token diff --git a/src/searcher.py b/src/searcher.py index dbb8835..720ba38 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -1,116 +1,116 @@ import sys import time import urllib.request -from prawcore.exceptions import NotFound, Forbidden +from urllib.error import HTTPError +from prawcore.exceptions import Forbidden, NotFound + +from src.errors import (InsufficientPermission, InvalidSortingType, MultiredditNotFound, NoMatchingSubmissionFound, + NoPrawSupport) from src.reddit import Reddit from src.utils import GLOBAL, createLogFile, printToFile -from src.errors import (NoMatchingSubmissionFound, NoPrawSupport, - MultiredditNotFound, - InvalidSortingType, InsufficientPermission) print = printToFile -def getPosts(programMode): +def getPosts(program_mode): """Call PRAW regarding to arguments and pass it to extractDetails. Return what extractDetails has returned. """ - reddit = Reddit(GLOBAL.config["credentials"]["reddit"]).begin() - if programMode["sort"] == "best": + if program_mode["sort"] == "best": raise NoPrawSupport("PRAW does not support that") - if "subreddit" in programMode: - if "search" in programMode: - if programMode["subreddit"] == "frontpage": - programMode["subreddit"] = "all" + if "subreddit" in program_mode: + if "search" in program_mode: + if program_mode["subreddit"] == "frontpage": + program_mode["subreddit"] = "all" - if "user" in programMode: - if programMode["user"] == "me": - programMode["user"] = str(reddit.user.me()) + if "user" in program_mode: + if program_mode["user"] == "me": + program_mode["user"] = str(reddit.user.me()) - if "search" not in programMode: - if programMode["sort"] == "top" or programMode["sort"] == "controversial": - keyword_params = { - "time_filter": programMode["time"], - "limit": programMode["limit"] - } + if "search" not in program_mode: + if program_mode["sort"] == "top" or program_mode["sort"] == "controversial": + keyword_params = {"time_filter": program_mode["time"], "limit": program_mode["limit"]} # OTHER SORT TYPES DON'T TAKE TIME_FILTER else: - keyword_params = { - "limit": programMode["limit"] - } + keyword_params = {"limit": program_mode["limit"]} else: - keyword_params = { - "time_filter": programMode["time"], - "limit": programMode["limit"] - } + keyword_params = {"time_filter": program_mode["time"], "limit": program_mode["limit"]} - if "search" in programMode: - if programMode["sort"] in ["hot", "rising", "controversial"]: + if "search" in program_mode: + if program_mode["sort"] in ["hot", "rising", "controversial"]: raise InvalidSortingType("Invalid sorting type has given") - if "subreddit" in programMode: + if "subreddit" in program_mode: print( "search for \"{search}\" in\n" "subreddit: {subreddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( - search=programMode["search"], - limit=programMode["limit"], - sort=programMode["sort"], - subreddit=programMode["subreddit"], - time=programMode["time"] - ).upper(), noPrint=True + search=program_mode["search"], + limit=program_mode["limit"], + sort=program_mode["sort"], + subreddit=program_mode["subreddit"], + time=program_mode["time"] + ).upper(), no_print=True ) return extractDetails( - reddit.subreddit(programMode["subreddit"]).search( - programMode["search"], - limit=programMode["limit"], - sort=programMode["sort"], - time_filter=programMode["time"] + reddit.subreddit(program_mode["subreddit"]).search( + program_mode["search"], + limit=program_mode["limit"], + sort=program_mode["sort"], + time_filter=program_mode["time"] ) ) - if "multireddit" in programMode: + elif "multireddit" in program_mode: raise NoPrawSupport("PRAW does not support that") - if "user" in programMode: + elif "user" in program_mode: raise NoPrawSupport("PRAW does not support that") - if "saved" in programMode: + elif "saved" in program_mode: raise ("Reddit does not support that") - if programMode["sort"] == "relevance": + if program_mode["sort"] == "relevance": raise InvalidSortingType("Invalid sorting type has given") - if "saved" in programMode: - print( - "saved posts\nuser:{username}\nlimit={limit}\n".format( - username=reddit.user.me(), - limit=programMode["limit"] - ).upper(), noPrint=True + if "saved" in program_mode: + print("saved posts\nuser:{username}\nlimit={limit}\n".format( + username=reddit.user.me(), + limit=program_mode["limit"]).upper(), + no_print=True ) - return extractDetails( - reddit.user.me().saved( - limit=programMode["limit"])) + return extractDetails(reddit.user.me().saved(limit=program_mode["limit"])) - if "subreddit" in programMode: - - if programMode["subreddit"] == "frontpage": + if "subreddit" in program_mode: + if program_mode["subreddit"] == "frontpage": print( "subreddit: {subreddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( - limit=programMode["limit"], - sort=programMode["sort"], - subreddit=programMode["subreddit"], - time=programMode["time"] - ).upper(), noPrint=True + limit=program_mode["limit"], + sort=program_mode["sort"], + subreddit=program_mode["subreddit"], + time=program_mode["time"]).upper(), + no_print=True + ) + return extractDetails(getattr(reddit.front, program_mode["sort"])(**keyword_params)) + + else: + print( + "subreddit: {subreddit}\nsort: {sort}\n" + "time: {time}\nlimit: {limit}\n".format( + limit=program_mode["limit"], + sort=program_mode["sort"], + subreddit=program_mode["subreddit"], + time=program_mode["time"]).upper(), + no_print=True ) return extractDetails( - getattr(reddit.front, programMode["sort"])(**keyword_params) + getattr(reddit.subreddit(program_mode["subreddit"]), program_mode["sort"])(**keyword_params) ) print( "subreddit: {subreddit}\nsort: {sort}\n" @@ -127,87 +127,75 @@ def getPosts(programMode): )(**keyword_params) ) - if "multireddit" in programMode: + elif "multireddit" in program_mode: print( "user: {user}\n" "multireddit: {multireddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( - user=programMode["user"], - limit=programMode["limit"], - sort=programMode["sort"], - multireddit=programMode["multireddit"], - time=programMode["time"] - ).upper(), noPrint=True + user=program_mode["user"], + limit=program_mode["limit"], + sort=program_mode["sort"], + multireddit=program_mode["multireddit"], + time=program_mode["time"]).upper(), + no_print=True ) try: return extractDetails( - getattr( - reddit.multireddit( - programMode["user"], programMode["multireddit"] - ), programMode["sort"] - )(**keyword_params) + getattr(reddit.multireddit(program_mode["user"], program_mode["multireddit"]), + program_mode["sort"] + )(**keyword_params) ) except NotFound: raise MultiredditNotFound("Multireddit not found") - elif "submitted" in programMode: + elif "submitted" in program_mode: print( "submitted posts of {user}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( - limit=programMode["limit"], - sort=programMode["sort"], - user=programMode["user"], - time=programMode["time"] - ).upper(), noPrint=True + limit=program_mode["limit"], + sort=program_mode["sort"], + user=program_mode["user"], + time=program_mode["time"]).upper(), + no_print=True ) return extractDetails( - getattr( - reddit.redditor(programMode["user"] - ).submissions, programMode["sort"] - )(**keyword_params) + getattr(reddit.redditor(program_mode["user"]).submissions, program_mode["sort"])(**keyword_params) ) - elif "upvoted" in programMode: + elif "upvoted" in program_mode: print( "upvoted posts of {user}\nlimit: {limit}\n".format( - user=programMode["user"], - limit=programMode["limit"] - ).upper(), noPrint=True + user=program_mode["user"], + limit=program_mode["limit"]).upper(), + no_print=True ) try: - return extractDetails( - reddit.redditor(programMode["user"]).upvoted( - limit=programMode["limit"]) - ) + return extractDetails(reddit.redditor(program_mode["user"]).upvoted(limit=program_mode["limit"])) except Forbidden: raise InsufficientPermission( "You do not have permission to do that") - elif "post" in programMode: - print("post: {post}\n".format( - post=programMode["post"]).upper(), noPrint=True) - return extractDetails( - reddit.submission(url=programMode["post"]), SINGLE_POST=True - ) + elif "post" in program_mode: + print("post: {post}\n".format(post=program_mode["post"]).upper(), no_print=True) + return extractDetails(reddit.submission(url=program_mode["post"]), single_post=True) -def extractDetails(posts, SINGLE_POST=False): +def extractDetails(posts, single_post=False): """Check posts and decide if it can be downloaded. If so, create a dictionary with post details and append them to a list. Write all of posts to file. Return the list """ + post_list = [] + post_count = 1 - postList = [] - postCount = 1 - - allPosts = {} + all_posts = {} print("\nGETTING POSTS") - postsFile = createLogFile("POSTS") + posts_file = createLogFile("POSTS") - if SINGLE_POST: + if single_post: submission = posts - postCount += 1 + post_count += 1 try: details = {'POSTID': submission.id, 'TITLE': submission.title, @@ -217,12 +205,8 @@ def extractDetails(posts, SINGLE_POST=False): 'SUBREDDIT': submission.subreddit.display_name, 'UPVOTES': submission.score, 'FLAIR': submission.link_flair_text, - 'DATE': str(time.strftime( - "%Y-%m-%d_%H-%M", - time.localtime(submission.created_utc) - ))} - if 'gallery' in submission.url: - details['CONTENTURL'] = genLinksifGallery(submission.media_metadata) + 'DATE': str(time.strftime("%Y-%m-%d_%H-%M", time.localtime(submission.created_utc))) + } except AttributeError: pass @@ -232,18 +216,17 @@ def extractDetails(posts, SINGLE_POST=False): if result is not None: details = {**details, **result} - postList.append(details) - postsFile.add({postCount: details}) + post_list.append(details) + posts_file.add({post_count: details}) else: try: for submission in posts: - - if postCount % 100 == 0: + if post_count % 100 == 0: sys.stdout.write("• ") sys.stdout.flush() - if postCount % 1000 == 0: + if post_count % 1000 == 0: sys.stdout.write("\n" + " " * 14) sys.stdout.flush() @@ -256,12 +239,8 @@ def extractDetails(posts, SINGLE_POST=False): 'SUBREDDIT': submission.subreddit.display_name, 'UPVOTES': submission.score, 'FLAIR': submission.link_flair_text, - 'DATE': str(time.strftime( - "%Y-%m-%d_%H-%M", - time.localtime(submission.created_utc) - ))} - if 'gallery' in submission.url: - details['CONTENTURL'] = genLinksifGallery(submission.media_metadata) + 'DATE': str(time.strftime("%Y-%m-%d_%H-%M", time.localtime(submission.created_utc))) + } except AttributeError: continue @@ -274,52 +253,46 @@ def extractDetails(posts, SINGLE_POST=False): if result is not None: details = {**details, **result} - postList.append(details) + post_list.append(details) - allPosts[postCount] = details - postCount += 1 + all_posts[post_count] = details + post_count += 1 except KeyboardInterrupt: - print("\nKeyboardInterrupt", noPrint=True) + print("\nKeyboardInterrupt", no_print=True) - postsFile.add(allPosts) + posts_file.add(all_posts) - if len(postList) != 0: + if not len(post_list) == 0: print() - return postList - raise NoMatchingSubmissionFound("No matching submission was found") + return post_list + else: + raise NoMatchingSubmissionFound("No matching submission was found") def matchWithDownloader(submission): - - if 'gallery' in submission.url: - return{'TYPE':'gallery'} - - directLink = extractDirectLink(submission.url) - if directLink: - return {'TYPE': 'direct', - 'CONTENTURL': directLink} + direct_link = extractDirectLink(submission.url) + if direct_link: + return {'TYPE': 'direct', 'CONTENTURL': direct_link} if 'v.redd.it' in submission.domain: - bitrates = ["DASH_1080", "DASH_720", "DASH_600", - "DASH_480", "DASH_360", "DASH_240"] + bitrates = ["DASH_1080", "DASH_720", "DASH_600", "DASH_480", "DASH_360", "DASH_240"] for bitrate in bitrates: - videoURL = submission.url + "/" + bitrate + ".mp4" + video_url = submission.url + "/" + bitrate + ".mp4" try: - responseCode = urllib.request.urlopen(videoURL).getcode() + response_code = urllib.request.urlopen(video_url).getcode() except urllib.error.HTTPError: - responseCode = 0 + response_code = 0 - if responseCode == 200: - return {'TYPE': 'v.redd.it', 'CONTENTURL': videoURL} + if response_code == 200: + return {'TYPE': 'v.redd.it', 'CONTENTURL': video_url} if 'gfycat' in submission.domain: return {'TYPE': 'gfycat'} - if 'youtube' in submission.domain \ - and 'watch' in submission.url: + if 'youtube' in submission.domain and 'watch' in submission.url: return {'TYPE': 'youtube'} if 'youtu.be' in submission.domain: @@ -342,33 +315,25 @@ def matchWithDownloader(submission): if 'reddit.com/gallery' in submission.url: return {'TYPE': 'gallery'} - if submission.is_self and 'self' not in GLOBAL.arguments.skip: + elif submission.is_self and 'self' not in GLOBAL.arguments.skip: return {'TYPE': 'self', 'CONTENT': submission.selftext} -def extractDirectLink(URL): +def extractDirectLink(url): """Check if link is a direct image link. If so, return URL, if not, return False """ + image_types = ['jpg', 'jpeg', 'png', 'mp4', 'webm', 'gif'] + if url[-1] == "/": + url = url[:-1] - imageTypes = ['jpg', 'jpeg', 'png', 'mp4', 'webm', 'gif'] - if URL[-1] == "/": - URL = URL[:-1] + if "i.reddituploads.com" in url: + return url - if "i.reddituploads.com" in URL: - return URL - - for extension in imageTypes: - if extension == URL.split(".")[-1]: - return URL - - return None - -def genLinksifGallery(metadata): - galleryImgUrls = list() - if metadata is not None: - for key in metadata: - galleryImgUrls.append(metadata[key]['s']['u'].split('?')[0].replace('preview','i')) - return galleryImgUrls + for extension in image_types: + if extension == url.split(".")[-1]: + return url + else: + return None diff --git a/src/utils.py b/src/utils.py index 402db90..5debfe9 100644 --- a/src/utils.py +++ b/src/utils.py @@ -8,7 +8,6 @@ from src.jsonHelper import JsonFile class GLOBAL: """Declare global variables""" - RUN_TIME = "" config = {'imgur_client_id': None, 'imgur_client_secret': None} arguments = None @@ -17,54 +16,47 @@ class GLOBAL: configDirectory = "" reddit_client_id = "U-6gk4ZCh3IeNQ" reddit_client_secret = "7CZHY6AmKweZME5s50SfDGylaPg" - @staticmethod - def downloadedPosts(): return [] printVanilla = print - log_stream = None + @staticmethod + def downloadedPosts(): + return [] -def createLogFile(TITLE): + +def createLogFile(title): """Create a log file with given name inside a folder time stampt in its name and put given arguments inside \"HEADER\" key """ + folder_directory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME - folderDirectory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME + log_filename = title.upper() + '.json' - logFilename = TITLE.upper() + '.json' + if not path.exists(folder_directory): + makedirs(folder_directory) - if not path.exists(folderDirectory): - makedirs(folderDirectory) + file = JsonFile(folder_directory / Path(log_filename)) + header = " ".join(sys.argv) + file.add({"HEADER": header}) - FILE = JsonFile(folderDirectory / Path(logFilename)) - HEADER = " ".join(sys.argv) - FILE.add({"HEADER": HEADER}) - - return FILE + return file -def printToFile(*args, noPrint=False, **kwargs): +def printToFile(*args, no_print=False, **kwargs): """Print to both CONSOLE and CONSOLE LOG file in a folder time stampt in the name """ + folder_directory = GLOBAL.directory / Path("LOG_FILES") / Path(GLOBAL.RUN_TIME) - folderDirectory = GLOBAL.directory / \ - Path("LOG_FILES") / Path(GLOBAL.RUN_TIME) - - if not noPrint or \ - GLOBAL.arguments.verbose or \ - "file" in kwargs: - + if not no_print or GLOBAL.arguments.verbose or "file" in kwargs: print(*args, **kwargs) - if not path.exists(folderDirectory): - makedirs(folderDirectory) + if not path.exists(folder_directory): + makedirs(folder_directory) if "file" not in kwargs: - with io.open( - folderDirectory / "CONSOLE_LOG.txt", "a", encoding="utf-8" - ) as FILE: + with io.open(folder_directory / "CONSOLE_LOG.txt", "a", encoding="utf-8") as FILE: print(*args, file=FILE, **kwargs) @@ -73,19 +65,17 @@ def nameCorrector(string, reference=None): with underscore (_) and shorten it. Return the string """ - - LIMIT = 247 - - stringLength = len(string) + limit = 247 + string_length = len(string) if reference: - referenceLenght = len(reference) - totalLenght = referenceLenght + reference_length = len(reference) + total_lenght = reference_length else: - totalLenght = stringLength + total_lenght = string_length - if totalLenght > LIMIT: - limit = LIMIT - referenceLenght + if total_lenght > limit: + limit -= reference_length string = string[:limit - 1] string = string.replace(" ", "_") @@ -93,8 +83,7 @@ def nameCorrector(string, reference=None): if len(string.split('\n')) > 1: string = "".join(string.split('\n')) - BAD_CHARS = ['\\', '/', ':', '*', '?', '"', '<', - '>', '|', '#', '.', '@', '“', '’', '\'', '!'] - string = "".join([i if i not in BAD_CHARS else "_" for i in string]) + bad_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '#', '.', '@', '“', '’', '\'', '!'] + string = "".join([i if i not in bad_chars else "_" for i in string]) return string