diff --git a/Dockerfile b/Dockerfile index d9940d5..366473f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,31 @@ -FROM python:latest +# Bulk Downloader for Reddit +# +# VERSION 0.0.1 + +FROM python:3.8-slim-buster +LABEL Description="This image enables running Buld Downloader for Reddit with in a container environment" Version="0.0.1" + +ENV PYTHONUNBUFFERED 1 +ENV PYTHONDONTWRITEBYTECODE 1 -WORKDIR "/root/Bulk Downloader for Reddit" -COPY ./requirements.txt ./ -RUN ["pip", "install", "-r", "requirements.txt"] EXPOSE 8080 EXPOSE 7634 -CMD ["python", "script.py", "-d", "downloads"] \ No newline at end of file +# Install dependencies for building Python packages +RUN apt-get update \ + && apt-get install -y build-essential \ + && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && rm -rf /var/lib/apt/lists/* + +# Requirements are installed here to ensure they will be cached. +COPY requirements.txt /requirements.txt +RUN pip install --no-cache-dir -r /requirements.txt \ + && rm -rf /requirements.txt + +# Copy project files into container +COPY . /bdfr +WORKDIR /bdfr + +# This is useful because the image name can double as a reference to the binary +ENTRYPOINT ["python", "script.py"] +CMD ["--help"] diff --git a/docker-compose.yml b/docker-compose.yml index e9c069a..da3afb4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,17 @@ version: "3" + services: + bdfr: + build: + context: . + dockerfile: ./Dockerfile image: bdfr - build: . + container_name: bdfr ports: - "8080:8080" - "7634:7634" volumes: - - "./:/root/Bulk Downloader for Reddit" + - .:/bdfr:z container_name: bdfr_container - network_mode: bridge \ No newline at end of file + network_mode: bridge diff --git a/script.py b/script.py index b47dc54..61315d6 100644 --- a/script.py +++ b/script.py @@ -4,15 +4,12 @@ This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3. """ - -import argparse import logging import os import sys import time -import webbrowser from io import StringIO -from pathlib import Path, PurePath +from pathlib import Path from prawcore.exceptions import InsufficientScope from src.downloaders.Direct import Direct @@ -26,8 +23,7 @@ from src.downloaders.vreddit import VReddit from src.downloaders.youtube import Youtube from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork from src.downloaders.gallery import gallery -from src.errors import ImgurLimitError, NoSuitablePost, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info -from src.parser import LinkDesigner +from src.errors import ImgurLimitError, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info from src.searcher import getPosts from src.utils import (GLOBAL, createLogFile, nameCorrector, printToFile) @@ -44,6 +40,7 @@ __version__ = "1.9.4" __maintainer__ = "Ali Parlakci" __email__ = "parlakciali@gmail.com" + def postFromLog(fileName): """Analyze a log file and return a list of dictionaries containing submissions @@ -62,17 +59,19 @@ def postFromLog(fileName): posts = [] for post in content: - if not content[post][-1]['TYPE'] == None: + if content[post][-1]['TYPE'] is not None: posts.append(content[post][-1]) return posts -def isPostExists(POST,directory): + +def isPostExists(POST, directory): """Figure out a file's name and checks if the file already exists""" filename = GLOBAL.config['filename'].format(**POST) - possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md",".mkv",".flv"] + possibleExtensions = [".jpg", ".png", ".mp4", + ".gif", ".webm", ".md", ".mkv", ".flv"] for extension in possibleExtensions: @@ -81,10 +80,10 @@ def isPostExists(POST,directory): if path.exists(): return True - else: - return False + return False -def downloadPost(SUBMISSION,directory): + +def downloadPost(SUBMISSION, directory): downloaders = { "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost, @@ -94,11 +93,10 @@ def downloadPost(SUBMISSION,directory): print() if SUBMISSION['TYPE'] in downloaders: - downloaders[SUBMISSION['TYPE']] (directory,SUBMISSION) + downloaders[SUBMISSION['TYPE']](directory, SUBMISSION) else: raise NoSuitablePost - return None def download(submissions): """Analyze list of submissions and call the right function @@ -114,32 +112,33 @@ def download(submissions): reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin() subsLenght = len(submissions) - + for i in range(len(submissions)): - print(f"\n({i+1}/{subsLenght})",end=" — ") + print(f"\n({i+1}/{subsLenght})", end=" — ") print(submissions[i]['POSTID'], f"r/{submissions[i]['SUBREDDIT']}", f"u/{submissions[i]['REDDITOR']}", submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "", sep=" — ", end="") - print(f" – {submissions[i]['TYPE'].upper()}",end="",noPrint=True) + print(f" – {submissions[i]['TYPE'].upper()}", end="", noPrint=True) - directory = GLOBAL.directory / GLOBAL.config["folderpath"].format(**submissions[i]) + directory = GLOBAL.directory / \ + GLOBAL.config["folderpath"].format(**submissions[i]) details = { - **submissions[i], + **submissions[i], **{ "TITLE": nameCorrector( submissions[i]['TITLE'], - reference = str(directory) - + GLOBAL.config['filename'].format(**submissions[i]) - + ".ext" + reference=str(directory) + + GLOBAL.config['filename'].format(**submissions[i]) + + ".ext" ) } } filename = GLOBAL.config['filename'].format(**details) - if isPostExists(details,directory): + if isPostExists(details, directory): print() print(directory) print(filename) @@ -154,7 +153,7 @@ def download(submissions): continue try: - downloadPost(details,directory) + downloadPost(details, directory) GLOBAL.downloadedPosts.add(details['POSTID']) try: if GLOBAL.arguments.unsave: @@ -162,9 +161,9 @@ def download(submissions): except InsufficientScope: reddit = Reddit().begin() reddit.submission(id=details['POSTID']).unsave() - + downloadedCount += 1 - + except FileAlreadyExistsError: print("It already exists") GLOBAL.downloadedPosts.add(details['POSTID']) @@ -172,15 +171,16 @@ def download(submissions): except ImgurLoginError: print( - "Imgur login failed. \nQuitting the program "\ + "Imgur login failed. \nQuitting the program " "as unexpected errors might occur." ) sys.exit() except ImgurLimitError as exception: - FAILED_FILE.add({int(i+1):[ + FAILED_FILE.add({int(i+1): [ "{class_name}: {info}".format( - class_name=exception.__class__.__name__,info=str(exception) + class_name=exception.__class__.__name__, info=str( + exception) ), details ]}) @@ -188,12 +188,14 @@ def download(submissions): except NotADownloadableLinkError as exception: print( "{class_name}: {info}".format( - class_name=exception.__class__.__name__,info=str(exception) + class_name=exception.__class__.__name__, info=str( + exception) ) ) - FAILED_FILE.add({int(i+1):[ + FAILED_FILE.add({int(i+1): [ "{class_name}: {info}".format( - class_name=exception.__class__.__name__,info=str(exception) + class_name=exception.__class__.__name__, info=str( + exception) ), submissions[i] ]}) @@ -215,61 +217,64 @@ def download(submissions): print("Failed to download the posts, skipping...") except AlbumNotDownloadedCompletely: print("Album did not downloaded completely.") - FAILED_FILE.add({int(i+1):[ + FAILED_FILE.add({int(i+1): [ "{class_name}: {info}".format( - class_name=exc.__class__.__name__,info=str(exc) + class_name=exc.__class__.__name__, info=str(exc) ), submissions[i] ]}) - + except Exception as exc: print( "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( - class_name=exc.__class__.__name__,info=str(exc) + class_name=exc.__class__.__name__, info=str(exc) ) ) logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) - print(GLOBAL.log_stream.getvalue(),noPrint=True) + print(GLOBAL.log_stream.getvalue(), noPrint=True) - FAILED_FILE.add({int(i+1):[ + FAILED_FILE.add({int(i+1): [ "{class_name}: {info}".format( - class_name=exc.__class__.__name__,info=str(exc) + class_name=exc.__class__.__name__, info=str(exc) ), submissions[i] ]}) if duplicates: - print(f"\nThere {'were' if duplicates > 1 else 'was'} " \ + print(f"\nThere {'were' if duplicates > 1 else 'was'} " f"{duplicates} duplicate{'s' if duplicates > 1 else ''}") - if downloadedCount == 0: - print("Nothing is downloaded :(") - - else: - print(f"Total of {downloadedCount} " \ + if downloadedCount: + print(f"Total of {downloadedCount} " f"link{'s' if downloadedCount > 1 else ''} downloaded!") + else: + print("Nothing is downloaded :(") + + def printLogo(): + VanillaPrint( - f"\nBulk Downloader for Reddit v{__version__}\n" \ - f"Written by Ali PARLAKCI – parlakciali@gmail.com\n\n" \ + f"\nBulk Downloader for Reddit v{__version__}\n" + f"Written by Ali PARLAKCI – parlakciali@gmail.com\n\n" f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n" ) + def main(): - + if Path("config.json").exists(): GLOBAL.configDirectory = Path("config.json") else: if not Path(GLOBAL.defaultConfigDirectory).is_dir(): os.makedirs(GLOBAL.defaultConfigDirectory) - GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json" + GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json" try: GLOBAL.config = Config(GLOBAL.configDirectory).generate() except InvalidJSONFile as exception: - VanillaPrint(str(exception.__class__.__name__),">>",str(exception)) + VanillaPrint(str(exception.__class__.__name__), ">>", str(exception)) VanillaPrint("Resolve it or remove it to proceed") input("\nPress enter to quit") sys.exit() @@ -298,11 +303,12 @@ def main(): if arguments.use_local_config: JsonFile("config.json").add(GLOBAL.config) sys.exit() - + if arguments.directory: GLOBAL.directory = Path(arguments.directory.strip()) elif "default_directory" in GLOBAL.config and GLOBAL.config["default_directory"] != "": - GLOBAL.directory = Path(GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME)) + GLOBAL.directory = Path( + GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME)) else: GLOBAL.directory = Path(input("\ndownload directory: ").strip()) @@ -312,14 +318,13 @@ def main(): GLOBAL.downloadedPosts = Store() printLogo() - print("\n"," ".join(sys.argv),"\n",noPrint=True) + print("\n", " ".join(sys.argv), "\n", noPrint=True) if arguments.log is not None: logDir = Path(arguments.log) download(postFromLog(logDir)) sys.exit() - programMode = ProgramMode(arguments).generate() try: @@ -327,7 +332,7 @@ def main(): except Exception as exc: logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) - print(GLOBAL.log_stream.getvalue(),noPrint=True) + print(GLOBAL.log_stream.getvalue(), noPrint=True) print(exc) sys.exit() @@ -335,27 +340,30 @@ def main(): print("I could not find any posts in that URL") sys.exit() - if GLOBAL.arguments.no_download: pass - else: download(posts) + if GLOBAL.arguments.no_download: + pass + else: + download(posts) + if __name__ == "__main__": - GLOBAL.log_stream = StringIO() + GLOBAL.log_stream = StringIO() logging.basicConfig(stream=GLOBAL.log_stream, level=logging.INFO) try: VanillaPrint = print print = printToFile GLOBAL.RUN_TIME = str(time.strftime( - "%d-%m-%Y_%H-%M-%S", - time.localtime(time.time()) - )) + "%d-%m-%Y_%H-%M-%S", + time.localtime(time.time()) + )) main() except KeyboardInterrupt: if GLOBAL.directory is None: GLOBAL.directory = Path("..\\") - + except Exception as exception: if GLOBAL.directory is None: GLOBAL.directory = Path("..\\") @@ -363,5 +371,5 @@ if __name__ == "__main__": exc_info=full_exc_info(sys.exc_info())) print(GLOBAL.log_stream.getvalue()) - if not GLOBAL.arguments.quit: input("\nPress enter to quit\n") - + if not GLOBAL.arguments.quit: + input("\nPress enter to quit\n") diff --git a/src/arguments.py b/src/arguments.py index dd7c0e9..e2838bf 100644 --- a/src/arguments.py +++ b/src/arguments.py @@ -1,31 +1,34 @@ import argparse import sys + class Arguments: @staticmethod - def parse(arguments=[]): + def parse(arguments=None): """Initialize argparse and add arguments""" + if arguments is None: + arguments = [] parser = argparse.ArgumentParser(allow_abbrev=False, - description="This program downloads " \ - "media from reddit " \ - "posts") - parser.add_argument("--directory","-d", - help="Specifies the directory where posts will be " \ + description="This program downloads " + "media from reddit " + "posts") + parser.add_argument("--directory", "-d", + help="Specifies the directory where posts will be " "downloaded to", metavar="DIRECTORY") - - parser.add_argument("--verbose","-v", + + parser.add_argument("--verbose", "-v", help="Verbose Mode", action="store_true", default=False) - - parser.add_argument("--quit","-q", + + parser.add_argument("--quit", "-q", help="Auto quit afer the process finishes", action="store_true", default=False) - parser.add_argument("--link","-l", + parser.add_argument("--link", "-l", help="Get posts from link", metavar="link") @@ -47,43 +50,45 @@ class Arguments: help="Gets upvoted posts of --user") parser.add_argument("--log", - help="Takes a log file which created by itself " \ - "(json files), reads posts and tries downloadin" \ - "g them again.", + help="Takes a log file which created by itself " + "(json files), reads posts and tries downloadin" + "g them again.", # type=argparse.FileType('r'), metavar="LOG FILE") - parser.add_argument("--subreddit", - nargs="+", - help="Triggers subreddit mode and takes subreddit's " \ - "name without r/. use \"frontpage\" for frontpage", - metavar="SUBREDDIT", - type=str) - + parser.add_argument( + "--subreddit", + nargs="+", + help="Triggers subreddit mode and takes subreddit's " + "name without r/. use \"frontpage\" for frontpage", + metavar="SUBREDDIT", + type=str) + parser.add_argument("--multireddit", - help="Triggers multireddit mode and takes "\ - "multireddit's name without m/", + help="Triggers multireddit mode and takes " + "multireddit's name without m/", metavar="MULTIREDDIT", type=str) parser.add_argument("--user", - help="reddit username if needed. use \"me\" for " \ - "current user", - required="--multireddit" in sys.argv or \ - "--submitted" in sys.argv, + help="reddit username if needed. use \"me\" for " + "current user", + required="--multireddit" in sys.argv or + "--submitted" in sys.argv, metavar="redditor", type=str) - parser.add_argument("--search", - help="Searches for given query in given subreddits", - metavar="query", - type=str) + parser.add_argument( + "--search", + help="Searches for given query in given subreddits", + metavar="query", + type=str) parser.add_argument("--sort", - help="Either hot, top, new, controversial, rising " \ - "or relevance default: hot", + help="Either hot, top, new, controversial, rising " + "or relevance default: hot", choices=[ - "hot","top","new","controversial","rising", + "hot", "top", "new", "controversial", "rising", "relevance" ], metavar="SORT TYPE", @@ -95,9 +100,10 @@ class Arguments: type=int) parser.add_argument("--time", - help="Either hour, day, week, month, year or all." \ - " default: all", - choices=["all","hour","day","week","month","year"], + help="Either hour, day, week, month, year or all." + " default: all", + choices=["all", "hour", "day", + "week", "month", "year"], metavar="TIME_LIMIT", type=str) @@ -105,57 +111,59 @@ class Arguments: nargs="+", help="Skip posts with given type", type=str, - choices=["images","videos","gifs","self"], - default=[]) + choices=["images", "videos", "gifs", "self"], + default=[]) parser.add_argument("--skip-domain", nargs="+", help="Skip posts with given domain", type=str, - default=[]) + default=[]) parser.add_argument("--set-folderpath", action="store_true", help="Set custom folderpath" - ) + ) parser.add_argument("--set-filename", action="store_true", help="Set custom filename", - ) - - parser.add_argument("--set-default-directory", - action="store_true", - help="Set a default directory to be used in case no directory is given", ) - parser.add_argument("--set-default-options", - action="store_true", - help="Set default options to use everytime program runs", - ) + parser.add_argument( + "--set-default-directory", + action="store_true", + help="Set a default directory to be used in case no directory is given", + ) - parser.add_argument("--use-local-config", - action="store_true", - help="Creates a config file in the program's directory and uses it. Useful for having multiple configs", - ) + parser.add_argument( + "--set-default-options", + action="store_true", + help="Set default options to use everytime program runs", + ) - parser.add_argument("--no-dupes", - action="store_true", - help="Do not download duplicate posts on different subreddits", - ) + parser.add_argument( + "--use-local-config", + action="store_true", + help="Creates a config file in the program's directory and uses it. Useful for having multiple configs", + ) - parser.add_argument("--downloaded-posts", - help="Use a hash file to keep track of downloaded files", - type=str - ) + parser.add_argument( + "--no-dupes", + action="store_true", + help="Do not download duplicate posts on different subreddits", + ) - parser.add_argument("--no-download", - action="store_true", - help="Just saved posts into a the POSTS.json file without downloading" - ) - + parser.add_argument( + "--downloaded-posts", + help="Use a hash file to keep track of downloaded files", + type=str) + + parser.add_argument( + "--no-download", + action="store_true", + help="Just saved posts into a the POSTS.json file without downloading") if arguments == []: return parser.parse_args() - else: - return parser.parse_args(arguments) \ No newline at end of file + return parser.parse_args(arguments) diff --git a/src/config.py b/src/config.py index 6c4c843..662f91a 100644 --- a/src/config.py +++ b/src/config.py @@ -1,15 +1,12 @@ -import os -import socket -import webbrowser -import random from src.reddit import Reddit from src.jsonHelper import JsonFile from src.utils import nameCorrector + class Config(): - def __init__(self,filename): + def __init__(self, filename): self.filename = filename self.file = JsonFile(self.filename) @@ -45,7 +42,7 @@ Existing filename template:""", None if "filename" not in self.file.read() else def _readCustomFileName(self): content = self.file.read() - if not "filename" in content: + if "filename" not in content: self.file.add({ "filename": "{REDDITOR}_{TITLE}_{POSTID}" }) @@ -75,9 +72,9 @@ Existing folder structure""", None if "folderpath" not in self.file.read() else "folderpath": folderpath }) - def _readCustomFolderPath(self,path=None): + def _readCustomFolderPath(self, path=None): content = self.file.read() - if not "folderpath" in content: + if "folderpath" not in content: self.file.add({ "folderpath": "{SUBREDDIT}" }) @@ -96,9 +93,9 @@ Existing default options:""", None if "options" not in self.file.read() else sel "options": options }) - def _readDefaultOptions(self,path=None): + def _readDefaultOptions(self, path=None): content = self.file.read() - if not "options" in content: + if "options" not in content: self.file.add({ "options": "" }) @@ -108,17 +105,17 @@ Existing default options:""", None if "options" not in self.file.read() else sel try: content = self.file.read()["credentials"] - except: + except BaseException: self.file.add({ - "credentials":{} + "credentials": {} }) content = self.file.read()["credentials"] - + if "reddit" in content and len(content["reddit"]) != 0: pass else: Reddit().begin() - + print() def setDefaultDirectory(self): @@ -126,7 +123,8 @@ Existing default options:""", None if "options" not in self.file.read() else sel Leave blank to reset it. You can use {time} in foler names to use to timestamp it For example: D:/archive/BDFR_{time} """) - print("Current default directory:", self.file.read()["default_directory"] if "default_directory" in self.file.read() else "") + print("Current default directory:", self.file.read()[ + "default_directory"] if "default_directory" in self.file.read() else "") self.file.add({ "default_directory": input(">> ") - }) \ No newline at end of file + }) diff --git a/src/downloaders/Direct.py b/src/downloaders/Direct.py index 59003fc..e22c8a3 100644 --- a/src/downloaders/Direct.py +++ b/src/downloaders/Direct.py @@ -1,18 +1,16 @@ import os from src.downloaders.downloaderUtils import getFile, getExtension - -from src.errors import FileNameTooLong from src.utils import GLOBAL -from src.utils import printToFile as print + class Direct: - def __init__(self,directory,POST): + def __init__(self, directory, POST): POST['EXTENSION'] = getExtension(POST['CONTENTURL']) - if not os.path.exists(directory): os.makedirs(directory) + if not os.path.exists(directory): + os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"] - shortFilename = POST['POSTID']+POST['EXTENSION'] + filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] + shortFilename = POST['POSTID'] + POST['EXTENSION'] - getFile(filename,shortFilename,directory,POST['CONTENTURL']) - \ No newline at end of file + getFile(filename, shortFilename, directory, POST['CONTENTURL']) diff --git a/src/downloaders/Erome.py b/src/downloaders/Erome.py index 7310311..4c7ef5c 100644 --- a/src/downloaders/Erome.py +++ b/src/downloaders/Erome.py @@ -1,19 +1,18 @@ import os -import logging -import sys import urllib.request from html.parser import HTMLParser from src.downloaders.downloaderUtils import getFile from src.downloaders.downloaderUtils import getExtension -from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely, - NotADownloadableLinkError, FileAlreadyExistsError, full_exc_info) +from src.errors import (AlbumNotDownloadedCompletely, + NotADownloadableLinkError, FileAlreadyExistsError) from src.utils import GLOBAL from src.utils import printToFile as print + class Erome: - def __init__(self,directory,post): + def __init__(self, directory, post): try: IMAGES = self.getLinks(post['CONTENTURL']) except urllib.error.HTTPError: @@ -24,19 +23,20 @@ class Erome: duplicates = 0 if imagesLenght == 1: - + extension = getExtension(IMAGES[0]) """Filenames are declared here""" - filename = GLOBAL.config['filename'].format(**post)+post["EXTENSION"] + filename = GLOBAL.config['filename'].format( + **post) + post["EXTENSION"] shortFilename = post['POSTID'] + extension imageURL = IMAGES[0] if 'https://' not in imageURL or 'http://' not in imageURL: imageURL = "https://" + imageURL - getFile(filename,shortFilename,directory,imageURL) + getFile(filename, shortFilename, directory, imageURL) else: filename = GLOBAL.config['filename'].format(**post) @@ -53,22 +53,22 @@ class Erome: os.makedirs(folderDir) for i in range(imagesLenght): - + extension = getExtension(IMAGES[i]) - filename = str(i+1)+extension + filename = str(i + 1) + extension imageURL = IMAGES[i] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL - print(" ({}/{})".format(i+1,imagesLenght)) + print(" ({}/{})".format(i + 1, imagesLenght)) print(" {}".format(filename)) try: - getFile(filename,filename,folderDir,imageURL,indent=2) + getFile(filename, filename, folderDir, imageURL, indent=2) print() except FileAlreadyExistsError: - print(" The file already exists" + " "*10,end="\n\n") + print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 @@ -87,20 +87,21 @@ class Erome: if duplicates == imagesLenght: raise FileAlreadyExistsError - elif howManyDownloaded + duplicates < imagesLenght: + if howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" ) - def getLinks(self,url,lineNumber=129): - + def getLinks(self, url, lineNumber=129): + content = [] lineNumber = None class EromeParser(HTMLParser): tag = None + def handle_starttag(self, tag, attrs): - self.tag = {tag:{attr[0]: attr[1] for attr in attrs}} + self.tag = {tag: {attr[0]: attr[1] for attr in attrs}} pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) @@ -109,7 +110,7 @@ class Erome: obj = EromeParser() obj.feed(pageSource[i]) tag = obj.tag - + if tag is not None: if "div" in tag: if "id" in tag["div"]: @@ -124,12 +125,12 @@ class Erome: if tag is not None: if "img" in tag: if "class" in tag["img"]: - if tag["img"]["class"]=="img-front": + if tag["img"]["class"] == "img-front": content.append(tag["img"]["src"]) elif "source" in tag: content.append(tag["source"]["src"]) - + return [ - link for link in content \ + link for link in content if link.endswith("_480p.mp4") or not link.endswith(".mp4") - ] \ No newline at end of file + ] diff --git a/src/downloaders/Gfycat.py b/src/downloaders/Gfycat.py index 7265425..6366329 100644 --- a/src/downloaders/Gfycat.py +++ b/src/downloaders/Gfycat.py @@ -4,27 +4,27 @@ import urllib.request from bs4 import BeautifulSoup from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely, - NotADownloadableLinkError, FileAlreadyExistsError) +from src.errors import (NotADownloadableLinkError) from src.utils import GLOBAL -from src.utils import printToFile as print from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork + class Gfycat: - def __init__(self,directory,POST): + def __init__(self, directory, POST): try: POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") POST['EXTENSION'] = getExtension(POST['MEDIAURL']) - - if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"] - shortFilename = POST['POSTID']+POST['EXTENSION'] - - getFile(filename,shortFilename,directory,POST['MEDIAURL']) + if not os.path.exists(directory): + os.makedirs(directory) + + filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] + shortFilename = POST['POSTID'] + POST['EXTENSION'] + + getFile(filename, shortFilename, directory, POST['MEDIAURL']) @staticmethod def getLink(url): @@ -43,10 +43,11 @@ class Gfycat: pageSource = (urllib.request.urlopen(url).read().decode()) soup = BeautifulSoup(pageSource, "html.parser") - attributes = {"data-react-helmet":"true","type":"application/ld+json"} - content = soup.find("script",attrs=attributes) + attributes = {"data-react-helmet": "true", + "type": "application/ld+json"} + content = soup.find("script", attrs=attributes) if content is None: return GifDeliveryNetwork.getLink(url) - return json.loads(content.contents[0])["video"]["contentUrl"] \ No newline at end of file + return json.loads(content.contents[0])["video"]["contentUrl"] diff --git a/src/downloaders/Imgur.py b/src/downloaders/Imgur.py index d361b31..5a38cde 100644 --- a/src/downloaders/Imgur.py +++ b/src/downloaders/Imgur.py @@ -1,7 +1,5 @@ -import urllib import json import os -import time import requests from src.utils import GLOBAL, nameCorrector @@ -10,16 +8,17 @@ from src.downloaders.Direct import Direct from src.downloaders.downloaderUtils import getFile from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip + class Imgur: IMGUR_IMAGE_DOMAIN = "https://i.imgur.com/" - def __init__(self,directory, post): + def __init__(self, directory, post): link = post['CONTENTURL'] if link.endswith(".gifv"): - link = link.replace(".gifv",".mp4") + link = link.replace(".gifv", ".mp4") Direct(directory, {**post, 'CONTENTURL': link}) return None @@ -57,22 +56,23 @@ class Imgur: extension = self.validateExtension(images["images"][i]["ext"]) - imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i]["hash"] + extension + imageURL = self.IMGUR_IMAGE_DOMAIN + \ + images["images"][i]["hash"] + extension - filename = "_".join([ - str(i+1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash'] - ]) + extension - shortFilename = str(i+1) + "_" + images["images"][i]['hash'] + filename = "_".join([str(i + 1), + nameCorrector(images["images"][i]['title']), + images["images"][i]['hash']]) + extension + shortFilename = str(i + 1) + "_" + images["images"][i]['hash'] - print("\n ({}/{})".format(i+1,imagesLenght)) + print("\n ({}/{})".format(i + 1, imagesLenght)) try: - getFile(filename,shortFilename,folderDir,imageURL,indent=2) + getFile(filename, shortFilename, folderDir, imageURL, indent=2) howManyDownloaded += 1 print() except FileAlreadyExistsError: - print(" The file already exists" + " "*10,end="\n\n") + print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: @@ -82,41 +82,41 @@ class Imgur: except Exception as exception: print("\n Could not get the file") print( - " " - + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + " " + + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, - info=str(exception) - ) - + "\n" - ) - print(GLOBAL.log_stream.getvalue(),noPrint=True) + info=str(exception)) + + "\n") + print(GLOBAL.log_stream.getvalue(), noPrint=True) if duplicates == imagesLenght: raise FileAlreadyExistsError - elif howManyDownloaded + duplicates < imagesLenght: + if howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" - ) + ) - def download(self, image): + def download(self, image): extension = self.validateExtension(image["ext"]) imageURL = self.IMGUR_IMAGE_DOMAIN + image["hash"] + extension filename = GLOBAL.config['filename'].format(**self.post) + extension - shortFilename = self.post['POSTID']+extension - - getFile(filename,shortFilename,self.directory,imageURL) + shortFilename = self.post['POSTID'] + extension + + getFile(filename, shortFilename, self.directory, imageURL) @property def isAlbum(self): return "album_images" in self.rawData - @staticmethod + @staticmethod def getData(link): - + cookies = {"over18": "1", "postpagebeta": "0"} res = requests.get(link, cookies=cookies) - if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}") + if res.status_code != 200: + raise ImageNotFound( + f"Server responded with {res.status_code} to {link}") pageSource = requests.get(link, cookies=cookies).text STARTING_STRING = "image : " @@ -124,18 +124,20 @@ class Imgur: STARTING_STRING_LENGHT = len(STARTING_STRING) try: - startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT + startIndex = pageSource.index( + STARTING_STRING) + STARTING_STRING_LENGHT endIndex = pageSource.index(ENDING_STRING, startIndex) except ValueError: - raise NotADownloadableLinkError(f"Could not read the page source on {link}") + raise NotADownloadableLinkError( + f"Could not read the page source on {link}") while pageSource[endIndex] != "}": - endIndex=endIndex-1 + endIndex = endIndex - 1 try: - data = pageSource[startIndex:endIndex+2].strip()[:-1] - except: - pageSource[endIndex+1]='}' - data = pageSource[startIndex:endIndex+3].strip()[:-1] + data = pageSource[startIndex:endIndex + 2].strip()[:-1] + except BaseException: + pageSource[endIndex + 1] = '}' + data = pageSource[startIndex:endIndex + 3].strip()[:-1] return json.loads(data) @@ -144,5 +146,8 @@ class Imgur: POSSIBLE_EXTENSIONS = [".jpg", ".png", ".mp4", ".gif"] for extension in POSSIBLE_EXTENSIONS: - if extension in string: return extension - else: raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.") + if extension in string: + return extension + + raise ExtensionError( + f"\"{string}\" is not recognized as a valid extension.") diff --git a/src/downloaders/downloaderUtils.py b/src/downloaders/downloaderUtils.py index d8ba74a..3bd4605 100644 --- a/src/downloaders/downloaderUtils.py +++ b/src/downloaders/downloaderUtils.py @@ -1,45 +1,51 @@ import sys import os -import time -from urllib.error import HTTPError import urllib.request from pathlib import Path import hashlib -from src.utils import nameCorrector, GLOBAL +from src.utils import GLOBAL from src.utils import printToFile as print -from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, TypeInSkip, DomainInSkip +from src.errors import FileAlreadyExistsError, FailedToDownload, TypeInSkip, DomainInSkip + def dlProgress(count, blockSize, totalSize): """Function for writing download progress to console """ - downloadedMbs = int(count*blockSize*(10**(-6))) - fileSize = int(totalSize*(10**(-6))) - sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize)) + downloadedMbs = int(count * blockSize * (10**(-6))) + fileSize = int(totalSize * (10**(-6))) + sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize)) sys.stdout.flush() + def getExtension(link): """Extract file extension from image link. If didn't find any, return '.jpg' """ - imageTypes = ['jpg','png','mp4','webm','gif'] + imageTypes = ['jpg', 'png', 'mp4', 'webm', 'gif'] parsed = link.split('.') for fileType in imageTypes: if fileType in parsed: - return "."+parsed[-1] - else: - if not "v.redd.it" in link: - return '.jpg' - else: - return '.mp4' + return "." + parsed[-1] -def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False): + if "v.redd.it" not in link: + return '.jpg' + return '.mp4' + + +def getFile( + filename, + shortFilename, + folderDir, + imageURL, + indent=0, + silent=False): FORMATS = { "videos": [".mp4", ".webm"], - "images": [".jpg",".jpeg",".png",".bmp"], + "images": [".jpg", ".jpeg", ".png", ".bmp"], "gifs": [".gif"], "self": [] } @@ -53,10 +59,10 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False): raise DomainInSkip headers = [ - ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \ - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\ + ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 " "Safari/537.36 OPR/54.0.2952.64"), - ("Accept", "text/html,application/xhtml+xml,application/xml;" \ + ("Accept", "text/html,application/xhtml+xml,application/xml;" "q=0.9,image/webp,image/apng,*/*;q=0.8"), ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"), ("Accept-Encoding", "none"), @@ -64,21 +70,22 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False): ("Connection", "keep-alive") ] - if not os.path.exists(folderDir): os.makedirs(folderDir) + if not os.path.exists(folderDir): + os.makedirs(folderDir) opener = urllib.request.build_opener() - if not "imgur" in imageURL: + if "imgur" not in imageURL: opener.addheaders = headers urllib.request.install_opener(opener) - if not silent: print(" "*indent + str(folderDir), - " "*indent + str(filename), - sep="\n") - + if not silent: + print(" " * indent + str(folderDir), + " " * indent + str(filename), + sep="\n") for i in range(3): fileDir = Path(folderDir) / filename - tempDir = Path(folderDir) / (filename+".tmp") + tempDir = Path(folderDir) / (filename + ".tmp") if not (os.path.isfile(fileDir)): try: @@ -93,8 +100,9 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False): raise FileAlreadyExistsError GLOBAL.downloadedPosts.add(fileHash) - os.rename(tempDir,fileDir) - if not silent: print(" "*indent+"Downloaded"+" "*10) + os.rename(tempDir, fileDir) + if not silent: + print(" " * indent + "Downloaded" + " " * 10) return None except ConnectionResetError: raise FailedToDownload @@ -104,6 +112,7 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False): raise FileAlreadyExistsError raise FailedToDownload + def createHash(filename): hash_md5 = hashlib.md5() with open(filename, "rb") as f: diff --git a/src/downloaders/gallery.py b/src/downloaders/gallery.py index 78caff2..ae32646 100644 --- a/src/downloaders/gallery.py +++ b/src/downloaders/gallery.py @@ -1,62 +1,65 @@ -import io import os import json import urllib import requests -from pathlib import Path -from src.utils import GLOBAL, nameCorrector +from src.utils import GLOBAL from src.utils import printToFile as print -from src.downloaders.Direct import Direct from src.downloaders.downloaderUtils import getFile -from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip +from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, NotADownloadableLinkError, TypeInSkip + class gallery: - def __init__(self,directory,post): + def __init__(self, directory, post): link = post['CONTENTURL'] self.rawData = self.getData(link) self.directory = directory self.post = post - - images={} - count=0 + + images = {} + count = 0 for model in self.rawData['posts']['models']: try: for item in self.rawData['posts']['models'][model]['media']['gallery']['items']: try: - images[count]={'id':item['mediaId'], 'url':self.rawData['posts']['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} - count=count+1 - except: + images[count] = {'id': item['mediaId'], 'url': self.rawData['posts'][ + 'models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} + count = count + 1 + except BaseException: continue - except: + except BaseException: continue - self.downloadAlbum(images,count) + self.downloadAlbum(images, count) - @staticmethod + @staticmethod def getData(link): headers = { - "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", - "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", } res = requests.get(link, headers=headers) - if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}") + if res.status_code != 200: + raise ImageNotFound( + f"Server responded with {res.status_code} to {link}") pageSource = res.text - + STARTING_STRING = "_r = {" ENDING_STRING = "" STARTING_STRING_LENGHT = len(STARTING_STRING) try: - startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT + startIndex = pageSource.index( + STARTING_STRING) + STARTING_STRING_LENGHT endIndex = pageSource.index(ENDING_STRING, startIndex) except ValueError: - raise NotADownloadableLinkError(f"Could not read the page source on {link}") + raise NotADownloadableLinkError( + f"Could not read the page source on {link}") - data = json.loads(pageSource[startIndex-1:endIndex+1].strip()[:-1]) + data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1]) return data def downloadAlbum(self, images, count): @@ -80,19 +83,20 @@ class gallery: extension = os.path.splitext(path)[1] filename = "_".join([ - str(i+1), images[i]['id'] + str(i + 1), images[i]['id'] ]) + extension - shortFilename = str(i+1) + "_" + images[i]['id'] + shortFilename = str(i + 1) + "_" + images[i]['id'] - print("\n ({}/{})".format(i+1,count)) + print("\n ({}/{})".format(i + 1, count)) try: - getFile(filename,shortFilename,folderDir,images[i]['url'],indent=2) + getFile(filename, shortFilename, folderDir, + images[i]['url'], indent=2) howManyDownloaded += 1 print() except FileAlreadyExistsError: - print(" The file already exists" + " "*10,end="\n\n") + print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: @@ -102,19 +106,16 @@ class gallery: except Exception as exception: print("\n Could not get the file") print( - " " - + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + " " + + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, - info=str(exception) - ) - + "\n" - ) - print(GLOBAL.log_stream.getvalue(),noPrint=True) + info=str(exception)) + + "\n") + print(GLOBAL.log_stream.getvalue(), noPrint=True) if duplicates == count: raise FileAlreadyExistsError - elif howManyDownloaded + duplicates < count: + if howManyDownloaded + duplicates < count: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" - ) - + ) diff --git a/src/downloaders/gifDeliveryNetwork.py b/src/downloaders/gifDeliveryNetwork.py index bc41e48..bd6002b 100644 --- a/src/downloaders/gifDeliveryNetwork.py +++ b/src/downloaders/gifDeliveryNetwork.py @@ -1,37 +1,37 @@ -import json import os import urllib.request from bs4 import BeautifulSoup from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely, - NotADownloadableLinkError, FileAlreadyExistsError) +from src.errors import (NotADownloadableLinkError) from src.utils import GLOBAL -from src.utils import printToFile as print + class GifDeliveryNetwork: - def __init__(self,directory,POST): + def __init__(self, directory, POST): try: POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") POST['EXTENSION'] = getExtension(POST['MEDIAURL']) - - if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"] - shortFilename = POST['POSTID']+POST['EXTENSION'] - - getFile(filename,shortFilename,directory,POST['MEDIAURL']) - + if not os.path.exists(directory): + os.makedirs(directory) + + filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] + shortFilename = POST['POSTID'] + POST['EXTENSION'] + + getFile(filename, shortFilename, directory, POST['MEDIAURL']) + @staticmethod def getLink(url): """Extract direct link to the video from page's source and return it """ - if '.webm' in url.split('/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]: + if '.webm' in url.split( + '/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]: return url if url[-1:] == '/': @@ -42,11 +42,11 @@ class GifDeliveryNetwork: pageSource = (urllib.request.urlopen(url).read().decode()) soup = BeautifulSoup(pageSource, "html.parser") - attributes = {"id":"mp4Source","type":"video/mp4"} - content = soup.find("source",attrs=attributes) + attributes = {"id": "mp4Source", "type": "video/mp4"} + content = soup.find("source", attrs=attributes) if content is None: - + raise NotADownloadableLinkError("Could not read the page source") - return content["src"] \ No newline at end of file + return content["src"] diff --git a/src/downloaders/redgifs.py b/src/downloaders/redgifs.py index 95502de..f87631b 100644 --- a/src/downloaders/redgifs.py +++ b/src/downloaders/redgifs.py @@ -4,28 +4,29 @@ import urllib.request from bs4 import BeautifulSoup from src.downloaders.downloaderUtils import getFile, getExtension -from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely, - NotADownloadableLinkError, FileAlreadyExistsError) +from src.errors import (NotADownloadableLinkError) from src.utils import GLOBAL -from src.utils import printToFile as print + class Redgifs: - def __init__(self,directory,POST): + def __init__(self, directory, POST): try: POST['MEDIAURL'] = self.getLink(POST['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") POST['EXTENSION'] = getExtension(POST['MEDIAURL']) - - if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"] - shortFilename = POST['POSTID']+POST['EXTENSION'] - - getFile(filename,shortFilename,directory,POST['MEDIAURL']) + if not os.path.exists(directory): + os.makedirs(directory) - def getLink(self, url): + filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"] + shortFilename = POST['POSTID'] + POST['EXTENSION'] + + getFile(filename, shortFilename, directory, POST['MEDIAURL']) + + @staticmethod + def getLink(url): """Extract direct link to the video from page's source and return it """ @@ -36,15 +37,19 @@ class Redgifs: if url[-1:] == '/': url = url[:-1] - url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1]) + url = urllib.request.Request( + "https://redgifs.com/watch/" + url.split('/')[-1]) - url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64') + url.add_header( + 'User-Agent', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64') pageSource = (urllib.request.urlopen(url).read().decode()) soup = BeautifulSoup(pageSource, "html.parser") - attributes = {"data-react-helmet":"true","type":"application/ld+json"} - content = soup.find("script",attrs=attributes) + attributes = {"data-react-helmet": "true", + "type": "application/ld+json"} + content = soup.find("script", attrs=attributes) if content is None: raise NotADownloadableLinkError("Could not read the page source") diff --git a/src/downloaders/selfPost.py b/src/downloaders/selfPost.py index 9fee2f0..f7a8a47 100644 --- a/src/downloaders/selfPost.py +++ b/src/downloaders/selfPost.py @@ -1,3 +1,4 @@ +from src.utils import printToFile as print import io import os from pathlib import Path @@ -6,36 +7,36 @@ from src.errors import FileAlreadyExistsError, TypeInSkip from src.utils import GLOBAL VanillaPrint = print -from src.utils import printToFile as print + class SelfPost: - def __init__(self,directory,post): + def __init__(self, directory, post): - if "self" in GLOBAL.arguments.skip: raise TypeInSkip + if "self" in GLOBAL.arguments.skip: + raise TypeInSkip - if not os.path.exists(directory): os.makedirs(directory) + if not os.path.exists(directory): + os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) - fileDir = directory / (filename+".md") + fileDir = directory / (filename + ".md") print(fileDir) - print(filename+".md") - + print(filename + ".md") if Path.is_file(fileDir): raise FileAlreadyExistsError - + try: - self.writeToFile(fileDir,post) + self.writeToFile(fileDir, post) except FileNotFoundError: - fileDir = post['POSTID']+".md" + fileDir = post['POSTID'] + ".md" fileDir = directory / fileDir - self.writeToFile(fileDir,post) - + self.writeToFile(fileDir, post) + @staticmethod - def writeToFile(directory,post): - + def writeToFile(directory, post): """Self posts are formatted here""" content = ("## [" + post["TITLE"] @@ -54,7 +55,7 @@ class SelfPost: + post["REDDITOR"] + ")") - with io.open(directory,"w",encoding="utf-8") as FILE: - VanillaPrint(content,file=FILE) - + with io.open(directory, "w", encoding="utf-8") as FILE: + VanillaPrint(content, file=FILE) + print("Downloaded") diff --git a/src/downloaders/vreddit.py b/src/downloaders/vreddit.py index a9ad478..7194042 100644 --- a/src/downloaders/vreddit.py +++ b/src/downloaders/vreddit.py @@ -1,42 +1,42 @@ import os import subprocess -from src.downloaders.downloaderUtils import getFile, getExtension - -from src.errors import FileNameTooLong +from src.downloaders.downloaderUtils import getFile from src.utils import GLOBAL from src.utils import printToFile as print -class VReddit: - def __init__(self,directory,post): - extension = ".mp4" - if not os.path.exists(directory): os.makedirs(directory) - filename = GLOBAL.config['filename'].format(**post)+extension - shortFilename = post['POSTID']+extension +class VReddit: + def __init__(self, directory, post): + extension = ".mp4" + if not os.path.exists(directory): + os.makedirs(directory) + + filename = GLOBAL.config['filename'].format(**post) + extension + shortFilename = post['POSTID'] + extension try: FNULL = open(os.devnull, 'w') subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT) - except: - getFile(filename,shortFilename,directory,post['CONTENTURL']) + except BaseException: + getFile(filename, shortFilename, directory, post['CONTENTURL']) print("FFMPEG library not found, skipping merging video and audio") else: videoName = post['POSTID'] + "_video" videoURL = post['CONTENTURL'] audioName = post['POSTID'] + "_audio" audioURL = videoURL[:videoURL.rfind('/')] + '/DASH_audio.mp4' - - print(directory,filename,sep="\n") - getFile(videoName,videoName,directory,videoURL,silent=True) - getFile(audioName,audioName,directory,audioURL,silent=True) + print(directory, filename, sep="\n") + + getFile(videoName, videoName, directory, videoURL, silent=True) + getFile(audioName, audioName, directory, audioURL, silent=True) try: self._mergeAudio(videoName, - audioName, - filename, - shortFilename, - directory) + audioName, + filename, + shortFilename, + directory) except KeyboardInterrupt: os.remove(directory / filename) os.remove(directory / audioName) @@ -44,8 +44,8 @@ class VReddit: os.rename(directory / videoName, directory / filename) @staticmethod - def _mergeAudio(video,audio,filename,shortFilename,directory): - + def _mergeAudio(video, audio, filename, shortFilename, directory): + inputVideo = str(directory / video) inputAudio = str(directory / audio) @@ -54,4 +54,4 @@ class VReddit: subprocess.call(cmd.split(), stdout=FNULL, stderr=subprocess.STDOUT) os.remove(directory / video) - os.remove(directory / audio) \ No newline at end of file + os.remove(directory / audio) diff --git a/src/downloaders/youtube.py b/src/downloaders/youtube.py index 46cb37c..a5c7922 100644 --- a/src/downloaders/youtube.py +++ b/src/downloaders/youtube.py @@ -2,22 +2,24 @@ import os import youtube_dl import sys -from src.downloaders.downloaderUtils import getExtension, dlProgress, createHash +from src.downloaders.downloaderUtils import createHash from src.utils import GLOBAL from src.utils import printToFile as print from src.errors import FileAlreadyExistsError + class Youtube: - def __init__(self,directory,post): - if not os.path.exists(directory): os.makedirs(directory) + def __init__(self, directory, post): + if not os.path.exists(directory): + os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) print(filename) - self.download(filename,directory,post['CONTENTURL']) - - def download(self,filename,directory,url): + self.download(filename, directory, post['CONTENTURL']) + + def download(self, filename, directory, url): ydl_opts = { "format": "best", "outtmpl": str(directory / (filename + ".%(ext)s")), @@ -29,7 +31,7 @@ class Youtube: with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) - location = directory/(filename+".mp4") + location = directory / (filename + ".mp4") if GLOBAL.arguments.no_dupes: try: @@ -40,12 +42,12 @@ class Youtube: os.remove(location) raise FileAlreadyExistsError GLOBAL.downloadedPosts.add(fileHash) - + @staticmethod def _hook(d): - if d['status'] == 'finished': return print("Downloaded") + if d['status'] == 'finished': + return print("Downloaded") downloadedMbs = int(d['downloaded_bytes'] * (10**(-6))) - fileSize = int(d['total_bytes']*(10**(-6))) - sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize)) + fileSize = int(d['total_bytes'] * (10**(-6))) + sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize)) sys.stdout.flush() - \ No newline at end of file diff --git a/src/errors.py b/src/errors.py index 0877e62..39d90be 100644 --- a/src/errors.py +++ b/src/errors.py @@ -1,9 +1,11 @@ import sys + def full_exc_info(exc_info): def current_stack(skip=0): - try: 1/0 + try: + 1 / 0 except ZeroDivisionError: f = sys.exc_info()[2].tb_frame for i in range(skip + 2): @@ -16,7 +18,7 @@ def full_exc_info(exc_info): def extend_traceback(tb, stack): - class FauxTb(object): + class FauxTb(): def __init__(self, tb_frame, tb_lineno, tb_next): self.tb_frame = tb_frame self.tb_lineno = tb_lineno @@ -33,80 +35,106 @@ def full_exc_info(exc_info): full_tb = extend_traceback(tb, current_stack(1)) return t, v, full_tb + class RedditLoginFailed(Exception): pass + class ImgurLoginError(Exception): pass + class FileAlreadyExistsError(Exception): pass + class NotADownloadableLinkError(Exception): pass + class AlbumNotDownloadedCompletely(Exception): pass + class FileNameTooLong(Exception): pass + class InvalidRedditLink(Exception): pass + class ProgramModeError(Exception): pass + class SearchModeError(Exception): pass + class RedditorNameError(Exception): pass + class NoMatchingSubmissionFound(Exception): pass + class NoPrawSupport(Exception): pass + class NoRedditSupport(Exception): pass + class MultiredditNotFound(Exception): pass + class InsufficientPermission(Exception): pass + class InvalidSortingType(Exception): pass + class FileNotFoundError(Exception): pass + class NoSuitablePost(Exception): pass + class ImgurLimitError(Exception): pass + class DirectLinkNotFound(Exception): pass + class InvalidJSONFile(Exception): pass + class FailedToDownload(Exception): pass + class TypeInSkip(Exception): pass + class DomainInSkip(Exception): pass + class ImageNotFound(Exception): pass + class ExtensionError(Exception): - pass \ No newline at end of file + pass diff --git a/src/jsonHelper.py b/src/jsonHelper.py index 79c9b0a..11508b4 100644 --- a/src/jsonHelper.py +++ b/src/jsonHelper.py @@ -3,6 +3,7 @@ from os import path, remove from src.errors import InvalidJSONFile + class JsonFile: """ Write and read JSON files @@ -10,13 +11,13 @@ class JsonFile: Use delete(self,*deletedKeys) to delete keys """ - + FILEDIR = "" - def __init__(self,FILEDIR): + def __init__(self, FILEDIR): self.FILEDIR = FILEDIR if not path.exists(self.FILEDIR): - self.__writeToFile({},create=True) + self.__writeToFile({}, create=True) def read(self): try: @@ -25,19 +26,21 @@ class JsonFile: except json.decoder.JSONDecodeError: raise InvalidJSONFile(f"{self.FILEDIR} cannot be read") - def add(self,toBeAdded,sub=None): + def add(self, toBeAdded, sub=None): """Takes a dictionary and merges it with json file. It uses new key's value if a key already exists. Returns the new content as a dictionary. """ data = self.read() - if sub: data[sub] = {**data[sub], **toBeAdded} - else: data = {**data, **toBeAdded} + if sub: + data[sub] = {**data[sub], **toBeAdded} + else: + data = {**data, **toBeAdded} self.__writeToFile(data) return self.read() - def delete(self,*deleteKeys): + def delete(self, *deleteKeys): """Delete given keys from JSON file. Returns the new content as a dictionary. """ @@ -51,8 +54,8 @@ class JsonFile: return False self.__writeToFile(data) - def __writeToFile(self,content,create=False): + def __writeToFile(self, content, create=False): if not create: remove(self.FILEDIR) with open(self.FILEDIR, 'w') as f: - json.dump(content, f, indent=4) \ No newline at end of file + json.dump(content, f, indent=4) diff --git a/src/parser.py b/src/parser.py index c8389aa..b48ea6d 100644 --- a/src/parser.py +++ b/src/parser.py @@ -5,13 +5,14 @@ try: except ModuleNotFoundError: from errors import InvalidRedditLink -def QueryParser(PassedQueries,index): + +def QueryParser(PassedQueries, index): ExtractedQueries = {} QuestionMarkIndex = PassedQueries.index("?") Header = PassedQueries[:QuestionMarkIndex] ExtractedQueries["HEADER"] = Header - Queries = PassedQueries[QuestionMarkIndex+1:] + Queries = PassedQueries[QuestionMarkIndex + 1:] ParsedQueries = Queries.split("&") @@ -20,15 +21,16 @@ def QueryParser(PassedQueries,index): ExtractedQueries[Query[0]] = Query[1] if ExtractedQueries["HEADER"] == "search": - ExtractedQueries["q"] = ExtractedQueries["q"].replace("%20"," ") + ExtractedQueries["q"] = ExtractedQueries["q"].replace("%20", " ") return ExtractedQueries + def LinkParser(LINK): RESULT = {} ShortLink = False - if not "reddit.com" in LINK: + if "reddit.com" not in LINK: raise InvalidRedditLink("Invalid reddit link") SplittedLink = LINK.split("/") @@ -37,7 +39,7 @@ def LinkParser(LINK): SplittedLink = SplittedLink[2:] try: - if (SplittedLink[-2].endswith("reddit.com") and \ + if (SplittedLink[-2].endswith("reddit.com") and SplittedLink[-1] == "") or \ SplittedLink[-1].endswith("reddit.com"): @@ -53,16 +55,16 @@ def LinkParser(LINK): if SplittedLink[0].endswith("reddit.com"): SplittedLink = SplittedLink[1:] - + if "comments" in SplittedLink: - RESULT = {"post":LINK} + RESULT = {"post": LINK} return RESULT - - elif "me" in SplittedLink or \ - "u" in SplittedLink or \ - "user" in SplittedLink or \ - "r" in SplittedLink or \ - "m" in SplittedLink: + + if "me" in SplittedLink or \ + "u" in SplittedLink or \ + "user" in SplittedLink or \ + "r" in SplittedLink or \ + "m" in SplittedLink: if "r" in SplittedLink: RESULT["subreddit"] = SplittedLink[SplittedLink.index("r") + 1] @@ -70,47 +72,46 @@ def LinkParser(LINK): elif "m" in SplittedLink: RESULT["multireddit"] = SplittedLink[SplittedLink.index("m") + 1] RESULT["user"] = SplittedLink[SplittedLink.index("m") - 1] - + else: for index in range(len(SplittedLink)): if SplittedLink[index] == "u" or \ - SplittedLink[index] == "user": + SplittedLink[index] == "user": - RESULT["user"] = SplittedLink[index+1] + RESULT["user"] = SplittedLink[index + 1] elif SplittedLink[index] == "me": RESULT["user"] = "me" - for index in range(len(SplittedLink)): if SplittedLink[index] in [ - "hot","top","new","controversial","rising" - ]: + "hot", "top", "new", "controversial", "rising" + ]: RESULT["sort"] = SplittedLink[index] if index == 0: RESULT["subreddit"] = "frontpage" - - elif SplittedLink[index] in ["submitted","saved","posts","upvoted"]: + + elif SplittedLink[index] in ["submitted", "saved", "posts", "upvoted"]: if SplittedLink[index] == "submitted" or \ SplittedLink[index] == "posts": RESULT["submitted"] = {} elif SplittedLink[index] == "saved": RESULT["saved"] = True - + elif SplittedLink[index] == "upvoted": RESULT["upvoted"] = True elif "?" in SplittedLink[index]: - ParsedQuery = QueryParser(SplittedLink[index],index) + ParsedQuery = QueryParser(SplittedLink[index], index) if ParsedQuery["HEADER"] == "search": del ParsedQuery["HEADER"] RESULT["search"] = ParsedQuery elif ParsedQuery["HEADER"] == "submitted" or \ - ParsedQuery["HEADER"] == "posts": + ParsedQuery["HEADER"] == "posts": del ParsedQuery["HEADER"] RESULT["submitted"] = ParsedQuery @@ -118,15 +119,16 @@ def LinkParser(LINK): del ParsedQuery["HEADER"] RESULT["queries"] = ParsedQuery - if not ("upvoted" in RESULT or \ - "saved" in RESULT or \ - "submitted" in RESULT or \ + if not ("upvoted" in RESULT or + "saved" in RESULT or + "submitted" in RESULT or "multireddit" in RESULT) and \ "user" in RESULT: RESULT["submitted"] = {} return RESULT + def LinkDesigner(LINK): attributes = LinkParser(LINK) @@ -138,13 +140,13 @@ def LinkDesigner(LINK): MODE["time"] = "" return MODE - elif "search" in attributes: + if "search" in attributes: MODE["search"] = attributes["search"]["q"] if "restrict_sr" in attributes["search"]: - - if not (attributes["search"]["restrict_sr"] == 0 or \ - attributes["search"]["restrict_sr"] == "off" or \ + + if not (attributes["search"]["restrict_sr"] == 0 or + attributes["search"]["restrict_sr"] == "off" or attributes["search"]["restrict_sr"] == ""): if "subreddit" in attributes: @@ -166,17 +168,17 @@ def LinkDesigner(LINK): MODE["sort"] = attributes["search"]["sort"] else: MODE["sort"] = "relevance" - + if "include_over_18" in attributes["search"]: if attributes["search"]["include_over_18"] == 1 or \ - attributes["search"]["include_over_18"] == "on": + attributes["search"]["include_over_18"] == "on": MODE["nsfw"] = True else: MODE["nsfw"] = False else: if "queries" in attributes: - if not ("submitted" in attributes or \ + if not ("submitted" in attributes or "posts" in attributes): if "t" in attributes["queries"]: @@ -195,11 +197,11 @@ def LinkDesigner(LINK): MODE["sort"] = "new" else: MODE["time"] = "day" - - if "subreddit" in attributes and not "search" in attributes: + + if "subreddit" in attributes and "search" not in attributes: MODE["subreddit"] = attributes["subreddit"] - elif "user" in attributes and not "search" in attributes: + elif "user" in attributes and "search" not in attributes: MODE["user"] = attributes["user"] if "submitted" in attributes: @@ -221,7 +223,7 @@ def LinkDesigner(LINK): elif "upvoted" in attributes: MODE["upvoted"] = True - + elif "multireddit" in attributes: MODE["multireddit"] = attributes["multireddit"] @@ -231,10 +233,11 @@ def LinkDesigner(LINK): pass else: MODE["sort"] = "hot" - + return MODE + if __name__ == "__main__": while True: link = input("> ") - pprint(LinkDesigner(link)) \ No newline at end of file + pprint(LinkDesigner(link)) diff --git a/src/programMode.py b/src/programMode.py index 85722ac..963e665 100644 --- a/src/programMode.py +++ b/src/programMode.py @@ -1,12 +1,12 @@ from src.errors import SearchModeError, RedditorNameError, ProgramModeError, InvalidSortingType -from src.utils import GLOBAL from src.parser import LinkDesigner from pathlib import Path import sys + class ProgramMode: - def __init__(self,arguments): + def __init__(self, arguments): self.arguments = arguments def generate(self): @@ -24,8 +24,8 @@ class ProgramMode: if self.arguments.search is not None: programMode["search"] = self.arguments.search if self.arguments.sort == "hot" or \ - self.arguments.sort == "controversial" or \ - self.arguments.sort == "rising": + self.arguments.sort == "controversial" or \ + self.arguments.sort == "rising": self.arguments.sort = "relevance" if self.arguments.sort is not None: @@ -57,7 +57,7 @@ class ProgramMode: programMode["time"] = self.arguments.time elif self.arguments.subreddit is not None: - if type(self.arguments.subreddit) == list: + if isinstance(self.arguments.subreddit, list): self.arguments.subreddit = "+".join(self.arguments.subreddit) programMode["subreddit"] = self.arguments.subreddit @@ -76,7 +76,7 @@ class ProgramMode: if self.arguments.sort == "rising": raise InvalidSortingType("Invalid sorting type has given") - + programMode["limit"] = self.arguments.limit return programMode @@ -84,29 +84,29 @@ class ProgramMode: @staticmethod def _chooseFrom(choices): print() - choicesByIndex = list(str(x) for x in range(len(choices)+1)) + choicesByIndex = [str(x) for x in range(len(choices) + 1)] for i in range(len(choices)): print("{indent}[{order}] {mode}".format( - indent=" "*4,order=i+1,mode=choices[i] + indent=" " * 4, order=i + 1, mode=choices[i] )) - print(" "*4+"[0] exit\n") + print(" " * 4 + "[0] exit\n") choice = input("> ") - while not choice.lower() in choices+choicesByIndex+["exit"]: + while not choice.lower() in choices + choicesByIndex + ["exit"]: print("Invalid input\n") input("> ") if choice == "0" or choice == "exit": sys.exit() elif choice in choicesByIndex: - return choices[int(choice)-1] + return choices[int(choice) - 1] else: return choice def _promptUser(self): print("select program mode:") programModes = [ - "search","subreddit","multireddit", - "submitted","upvoted","saved","log" + "search", "subreddit", "multireddit", + "submitted", "upvoted", "saved", "log" ] programMode = self._chooseFrom(programModes) @@ -116,24 +116,25 @@ class ProgramMode: print("\nselect sort type:") sortTypes = [ - "relevance","top","new" + "relevance", "top", "new" ] sortType = self._chooseFrom(sortTypes) self.arguments.sort = sortType print("\nselect time filter:") timeFilters = [ - "hour","day","week","month","year","all" + "hour", "day", "week", "month", "year", "all" ] timeFilter = self._chooseFrom(timeFilters) self.arguments.time = timeFilter if programMode == "subreddit": - subredditInput = input("(type frontpage for all subscribed subreddits,\n" \ - " use plus to seperate multi subreddits:" \ - " pics+funny+me_irl etc.)\n\n" \ - "subreddit: ") + subredditInput = input( + "(type frontpage for all subscribed subreddits,\n" + " use plus to seperate multi subreddits:" + " pics+funny+me_irl etc.)\n\n" + "subreddit: ") self.arguments.subreddit = subredditInput # while not (subredditInput == "" or subredditInput.lower() == "frontpage"): @@ -141,24 +142,25 @@ class ProgramMode: # self.arguments.subreddit += "+" + subredditInput if " " in self.arguments.subreddit: - self.arguments.subreddit = "+".join(self.arguments.subreddit.split()) + self.arguments.subreddit = "+".join( + self.arguments.subreddit.split()) # DELETE THE PLUS (+) AT THE END if not subredditInput.lower() == "frontpage" \ - and self.arguments.subreddit[-1] == "+": + and self.arguments.subreddit[-1] == "+": self.arguments.subreddit = self.arguments.subreddit[:-1] print("\nselect sort type:") sortTypes = [ - "hot","top","new","rising","controversial" + "hot", "top", "new", "rising", "controversial" ] sortType = self._chooseFrom(sortTypes) self.arguments.sort = sortType - if sortType in ["top","controversial"]: + if sortType in ["top", "controversial"]: print("\nselect time filter:") timeFilters = [ - "hour","day","week","month","year","all" + "hour", "day", "week", "month", "year", "all" ] timeFilter = self._chooseFrom(timeFilters) self.arguments.time = timeFilter @@ -168,31 +170,31 @@ class ProgramMode: elif programMode == "multireddit": self.arguments.user = input("\nmultireddit owner: ") self.arguments.multireddit = input("\nmultireddit: ") - + print("\nselect sort type:") sortTypes = [ - "hot","top","new","rising","controversial" + "hot", "top", "new", "rising", "controversial" ] sortType = self._chooseFrom(sortTypes) self.arguments.sort = sortType - if sortType in ["top","controversial"]: + if sortType in ["top", "controversial"]: print("\nselect time filter:") timeFilters = [ - "hour","day","week","month","year","all" + "hour", "day", "week", "month", "year", "all" ] timeFilter = self._chooseFrom(timeFilters) self.arguments.time = timeFilter else: self.arguments.time = "all" - + elif programMode == "submitted": self.arguments.submitted = True self.arguments.user = input("\nredditor: ") print("\nselect sort type:") sortTypes = [ - "hot","top","new","controversial" + "hot", "top", "new", "controversial" ] sortType = self._chooseFrom(sortTypes) self.arguments.sort = sortType @@ -200,25 +202,25 @@ class ProgramMode: if sortType == "top": print("\nselect time filter:") timeFilters = [ - "hour","day","week","month","year","all" + "hour", "day", "week", "month", "year", "all" ] timeFilter = self._chooseFrom(timeFilters) self.arguments.time = timeFilter else: self.arguments.time = "all" - + elif programMode == "upvoted": self.arguments.upvoted = True self.arguments.user = input("\nredditor: ") - + elif programMode == "saved": self.arguments.saved = True - + elif programMode == "log": while True: self.arguments.log = input("\nlog file directory:") if Path(self.arguments.log).is_file(): - break + break while True: try: self.arguments.limit = int(input("\nlimit (0 for none): ")) @@ -241,30 +243,35 @@ class ProgramMode: search = 1 if self.arguments.search else 0 modes = [ - "saved","subreddit","submitted","log","link","upvoted","multireddit" - ] + "saved", + "subreddit", + "submitted", + "log", + "link", + "upvoted", + "multireddit"] values = { - x: 0 if getattr(self.arguments,x) is None or \ - getattr(self.arguments,x) is False \ - else 1 \ - for x in modes + x: 0 if getattr(self.arguments, x) is None or + getattr(self.arguments, x) is False + else 1 + for x in modes } if not sum(values[x] for x in values) == 1: raise ProgramModeError("Invalid program mode") - - if search+values["saved"] == 2: + + if search + values["saved"] == 2: raise SearchModeError("You cannot search in your saved posts") - if search+values["submitted"] == 2: + if search + values["submitted"] == 2: raise SearchModeError("You cannot search in submitted posts") - if search+values["upvoted"] == 2: + if search + values["upvoted"] == 2: raise SearchModeError("You cannot search in upvoted posts") - if search+values["log"] == 2: + if search + values["log"] == 2: raise SearchModeError("You cannot search in log files") - if values["upvoted"]+values["submitted"] == 1 and user == 0: + if values["upvoted"] + values["submitted"] == 1 and user == 0: raise RedditorNameError("No redditor name given") diff --git a/src/reddit.py b/src/reddit.py index c5cb864..9dc374c 100644 --- a/src/reddit.py +++ b/src/reddit.py @@ -2,23 +2,24 @@ import praw import random import socket import webbrowser -from prawcore.exceptions import NotFound, ResponseException, Forbidden +from prawcore.exceptions import ResponseException from src.utils import GLOBAL from src.jsonHelper import JsonFile from src. errors import RedditLoginFailed + class Reddit: - - def __init__(self,refresh_token=None): - self.SCOPES = ['identity','history','read','save'] + + def __init__(self, refresh_token=None): + self.SCOPES = ['identity', 'history', 'read', 'save'] self.PORT = 7634 self.refresh_token = refresh_token self.redditInstance = None self.arguments = { - "client_id":GLOBAL.reddit_client_id, - "client_secret":GLOBAL.reddit_client_secret, - "user_agent":str(socket.gethostname()) + "client_id": GLOBAL.reddit_client_id, + "client_secret": GLOBAL.reddit_client_secret, + "user_agent": str(socket.gethostname()) } def begin(self): @@ -30,18 +31,20 @@ class Reddit: self.redditInstance.auth.scopes() return self.redditInstance except ResponseException: - self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT) + self.arguments["redirect_uri"] = "http://localhost:" + \ + str(self.PORT) self.redditInstance = praw.Reddit(**self.arguments) reddit, refresh_token = self.getRefreshToken(*self.SCOPES) else: - self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT) + self.arguments["redirect_uri"] = "http://localhost:" + \ + str(self.PORT) self.redditInstance = praw.Reddit(**self.arguments) reddit, refresh_token = self.getRefreshToken(*self.SCOPES) JsonFile(GLOBAL.configDirectory).add({ "reddit_username": str(reddit.user.me()), "reddit": refresh_token - },"credentials") + }, "credentials") return self.redditInstance @@ -57,42 +60,45 @@ class Reddit: server.close() return client - def send_message(self, client, message): + @staticmethod + def send_message(client, message): """Send message to client and close the connection.""" client.send( 'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8') ) client.close() - def getRefreshToken(self,*scopes): + def getRefreshToken(self, *scopes): state = str(random.randint(0, 65000)) url = self.redditInstance.auth.url(scopes, state, 'permanent') print("---Setting up the Reddit API---\n") - print("Go to this URL and login to reddit:\n",url,sep="\n",end="\n\n") - webbrowser.open(url,new=2) + print( + "Go to this URL and login to reddit:\n", + url, + sep="\n", + end="\n\n") + webbrowser.open(url, new=2) client = self.recieve_connection() data = client.recv(1024).decode('utf-8') str(data) param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&') - params = { - key: value for (key, value) in [token.split('=') \ - for token in param_tokens] - } + params = dict([token.split('=') + for token in param_tokens]) if state != params['state']: self.send_message( client, 'State mismatch. Expected: {} Received: {}' .format(state, params['state']) ) raise RedditLoginFailed - elif 'error' in params: + if 'error' in params: self.send_message(client, params['error']) raise RedditLoginFailed - + refresh_token = self.redditInstance.auth.authorize(params['code']) self.send_message(client, - "" - ) - return (self.redditInstance,refresh_token) + "" + ) + return (self.redditInstance, refresh_token) diff --git a/src/searcher.py b/src/searcher.py index a2bd04d..23033bf 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -1,25 +1,17 @@ -import os import sys -import random -import socket import time -import webbrowser import urllib.request -from urllib.error import HTTPError - -import praw -from prawcore.exceptions import NotFound, ResponseException, Forbidden +from prawcore.exceptions import NotFound, Forbidden from src.reddit import Reddit from src.utils import GLOBAL, createLogFile, printToFile -from src.jsonHelper import JsonFile from src.errors import (NoMatchingSubmissionFound, NoPrawSupport, - NoRedditSupport, MultiredditNotFound, - InvalidSortingType, RedditLoginFailed, - InsufficientPermission, DirectLinkNotFound) + MultiredditNotFound, + InvalidSortingType, InsufficientPermission) print = printToFile + def getPosts(programMode): """Call PRAW regarding to arguments and pass it to extractDetails. Return what extractDetails has returned. @@ -39,39 +31,39 @@ def getPosts(programMode): if programMode["user"] == "me": programMode["user"] = str(reddit.user.me()) - if not "search" in programMode: + if "search" not in programMode: if programMode["sort"] == "top" or programMode["sort"] == "controversial": keyword_params = { - "time_filter":programMode["time"], - "limit":programMode["limit"] + "time_filter": programMode["time"], + "limit": programMode["limit"] } # OTHER SORT TYPES DON'T TAKE TIME_FILTER else: keyword_params = { - "limit":programMode["limit"] + "limit": programMode["limit"] } else: keyword_params = { - "time_filter":programMode["time"], - "limit":programMode["limit"] - } + "time_filter": programMode["time"], + "limit": programMode["limit"] + } if "search" in programMode: - if programMode["sort"] in ["hot","rising","controversial"]: + if programMode["sort"] in ["hot", "rising", "controversial"]: raise InvalidSortingType("Invalid sorting type has given") if "subreddit" in programMode: - print ( - "search for \"{search}\" in\n" \ - "subreddit: {subreddit}\nsort: {sort}\n" \ + print( + "search for \"{search}\" in\n" + "subreddit: {subreddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( search=programMode["search"], limit=programMode["limit"], sort=programMode["sort"], subreddit=programMode["subreddit"], time=programMode["time"] - ).upper(),noPrint=True - ) + ).upper(), noPrint=True + ) return extractDetails( reddit.subreddit(programMode["subreddit"]).search( programMode["search"], @@ -81,15 +73,15 @@ def getPosts(programMode): ) ) - elif "multireddit" in programMode: - raise NoPrawSupport("PRAW does not support that") - - elif "user" in programMode: + if "multireddit" in programMode: raise NoPrawSupport("PRAW does not support that") - elif "saved" in programMode: + if "user" in programMode: + raise NoPrawSupport("PRAW does not support that") + + if "saved" in programMode: raise ("Reddit does not support that") - + if programMode["sort"] == "relevance": raise InvalidSortingType("Invalid sorting type has given") @@ -98,103 +90,108 @@ def getPosts(programMode): "saved posts\nuser:{username}\nlimit={limit}\n".format( username=reddit.user.me(), limit=programMode["limit"] - ).upper(),noPrint=True + ).upper(), noPrint=True ) - return extractDetails(reddit.user.me().saved(limit=programMode["limit"])) + return extractDetails( + reddit.user.me().saved( + limit=programMode["limit"])) if "subreddit" in programMode: if programMode["subreddit"] == "frontpage": - print ( - "subreddit: {subreddit}\nsort: {sort}\n" \ + print( + "subreddit: {subreddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( limit=programMode["limit"], sort=programMode["sort"], subreddit=programMode["subreddit"], time=programMode["time"] - ).upper(),noPrint=True + ).upper(), noPrint=True ) return extractDetails( - getattr(reddit.front,programMode["sort"]) (**keyword_params) + getattr(reddit.front, programMode["sort"])(**keyword_params) ) + print( + "subreddit: {subreddit}\nsort: {sort}\n" + "time: {time}\nlimit: {limit}\n".format( + limit=programMode["limit"], + sort=programMode["sort"], + subreddit=programMode["subreddit"], + time=programMode["time"] + ).upper(), noPrint=True + ) + return extractDetails( + getattr( + reddit.subreddit(programMode["subreddit"]), programMode["sort"] + )(**keyword_params) + ) - else: - print ( - "subreddit: {subreddit}\nsort: {sort}\n" \ - "time: {time}\nlimit: {limit}\n".format( - limit=programMode["limit"], - sort=programMode["sort"], - subreddit=programMode["subreddit"], - time=programMode["time"] - ).upper(),noPrint=True - ) - return extractDetails( - getattr( - reddit.subreddit(programMode["subreddit"]),programMode["sort"] - ) (**keyword_params) - ) - - elif "multireddit" in programMode: - print ( - "user: {user}\n" \ - "multireddit: {multireddit}\nsort: {sort}\n" \ + if "multireddit" in programMode: + print( + "user: {user}\n" + "multireddit: {multireddit}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( user=programMode["user"], limit=programMode["limit"], sort=programMode["sort"], multireddit=programMode["multireddit"], time=programMode["time"] - ).upper(),noPrint=True + ).upper(), noPrint=True ) try: return extractDetails( getattr( reddit.multireddit( programMode["user"], programMode["multireddit"] - ),programMode["sort"] - ) (**keyword_params) + ), programMode["sort"] + )(**keyword_params) ) except NotFound: raise MultiredditNotFound("Multireddit not found") elif "submitted" in programMode: - print ( - "submitted posts of {user}\nsort: {sort}\n" \ + print( + "submitted posts of {user}\nsort: {sort}\n" "time: {time}\nlimit: {limit}\n".format( limit=programMode["limit"], sort=programMode["sort"], user=programMode["user"], time=programMode["time"] - ).upper(),noPrint=True + ).upper(), noPrint=True ) return extractDetails( getattr( - reddit.redditor(programMode["user"]).submissions,programMode["sort"] - ) (**keyword_params) + reddit.redditor(programMode["user"] + ).submissions, programMode["sort"] + )(**keyword_params) ) elif "upvoted" in programMode: - print ( + print( "upvoted posts of {user}\nlimit: {limit}\n".format( user=programMode["user"], limit=programMode["limit"] - ).upper(),noPrint=True + ).upper(), noPrint=True ) try: return extractDetails( - reddit.redditor(programMode["user"]).upvoted(limit=programMode["limit"]) + reddit.redditor(programMode["user"]).upvoted( + limit=programMode["limit"]) ) except Forbidden: - raise InsufficientPermission("You do not have permission to do that") + raise InsufficientPermission( + "You do not have permission to do that") elif "post" in programMode: - print("post: {post}\n".format(post=programMode["post"]).upper(),noPrint=True) + print("post: {post}\n".format( + post=programMode["post"]).upper(), noPrint=True) return extractDetails( - reddit.submission(url=programMode["post"]),SINGLE_POST=True + reddit.submission(url=programMode["post"]), SINGLE_POST=True ) -def extractDetails(posts,SINGLE_POST=False): + +def extractDetails(posts, SINGLE_POST=False): """Check posts and decide if it can be downloaded. If so, create a dictionary with post details and append them to a list. Write all of posts to file. Return the list @@ -210,32 +207,33 @@ def extractDetails(posts,SINGLE_POST=False): if SINGLE_POST: submission = posts - postCount += 1 + postCount += 1 try: - details = {'POSTID':submission.id, - 'TITLE':submission.title, - 'REDDITOR':str(submission.author), - 'TYPE':None, - 'CONTENTURL':submission.url, - 'SUBREDDIT':submission.subreddit.display_name, + details = {'POSTID': submission.id, + 'TITLE': submission.title, + 'REDDITOR': str(submission.author), + 'TYPE': None, + 'CONTENTURL': submission.url, + 'SUBREDDIT': submission.subreddit.display_name, 'UPVOTES': submission.score, - 'FLAIR':submission.link_flair_text, - 'DATE':str(time.strftime( - "%Y-%m-%d_%H-%M", - time.localtime(submission.created_utc) - ))} + 'FLAIR': submission.link_flair_text, + 'DATE': str(time.strftime( + "%Y-%m-%d_%H-%M", + time.localtime(submission.created_utc) + ))} if 'gallery' in submission.url: details['CONTENTURL'] = genLinksifGallery(submission.media_metadata) except AttributeError: pass - if not any(domain in submission.domain for domain in GLOBAL.arguments.skip_domain): + if not any( + domain in submission.domain for domain in GLOBAL.arguments.skip_domain): result = matchWithDownloader(submission) if result is not None: details = {**details, **result} postList.append(details) - postsFile.add({postCount:details}) + postsFile.add({postCount: details}) else: try: @@ -246,49 +244,51 @@ def extractDetails(posts,SINGLE_POST=False): sys.stdout.flush() if postCount % 1000 == 0: - sys.stdout.write("\n"+" "*14) + sys.stdout.write("\n" + " " * 14) sys.stdout.flush() try: - details = {'POSTID':submission.id, - 'TITLE':submission.title, - 'REDDITOR':str(submission.author), - 'TYPE':None, - 'CONTENTURL':submission.url, - 'SUBREDDIT':submission.subreddit.display_name, - 'UPVOTES': submission.score, - 'FLAIR':submission.link_flair_text, - 'DATE':str(time.strftime( - "%Y-%m-%d_%H-%M", - time.localtime(submission.created_utc) - ))} + details = {'POSTID': submission.id, + 'TITLE': submission.title, + 'REDDITOR': str(submission.author), + 'TYPE': None, + 'CONTENTURL': submission.url, + 'SUBREDDIT': submission.subreddit.display_name, + 'UPVOTES': submission.score, + 'FLAIR': submission.link_flair_text, + 'DATE': str(time.strftime( + "%Y-%m-%d_%H-%M", + time.localtime(submission.created_utc) + ))} if 'gallery' in submission.url: details['CONTENTURL'] = genLinksifGallery(submission.media_metadata) except AttributeError: continue - if details['POSTID'] in GLOBAL.downloadedPosts(): continue + if details['POSTID'] in GLOBAL.downloadedPosts(): + continue - if not any(domain in submission.domain for domain in GLOBAL.arguments.skip_domain): + if not any( + domain in submission.domain for domain in GLOBAL.arguments.skip_domain): result = matchWithDownloader(submission) if result is not None: details = {**details, **result} postList.append(details) - + allPosts[postCount] = details postCount += 1 - + except KeyboardInterrupt: - print("\nKeyboardInterrupt",noPrint=True) - + print("\nKeyboardInterrupt", noPrint=True) + postsFile.add(allPosts) - if not len(postList) == 0: + if len(postList) != 0: print() return postList - else: - raise NoMatchingSubmissionFound("No matching submission was found") + raise NoMatchingSubmissionFound("No matching submission was found") + def matchWithDownloader(submission): @@ -297,29 +297,29 @@ def matchWithDownloader(submission): directLink = extractDirectLink(submission.url) if directLink: - return {'TYPE': 'direct', - 'CONTENTURL': directLink} + return {'TYPE': 'direct', + 'CONTENTURL': directLink} if 'v.redd.it' in submission.domain: - bitrates = ["DASH_1080","DASH_720","DASH_600", \ - "DASH_480","DASH_360","DASH_240"] - - for bitrate in bitrates: - videoURL = submission.url+"/"+bitrate+".mp4" + bitrates = ["DASH_1080", "DASH_720", "DASH_600", + "DASH_480", "DASH_360", "DASH_240"] - try: + for bitrate in bitrates: + videoURL = submission.url + "/" + bitrate + ".mp4" + + try: responseCode = urllib.request.urlopen(videoURL).getcode() except urllib.error.HTTPError: responseCode = 0 if responseCode == 200: - return {'TYPE': 'v.redd.it', 'CONTENTURL': videoURL} + return {'TYPE': 'v.redd.it', 'CONTENTURL': videoURL} if 'gfycat' in submission.domain: return {'TYPE': 'gfycat'} if 'youtube' in submission.domain \ - and 'watch' in submission.url: + and 'watch' in submission.url: return {'TYPE': 'youtube'} if 'youtu.be' in submission.domain: @@ -341,18 +341,19 @@ def matchWithDownloader(submission): if 'reddit.com/gallery' in submission.url: return {'TYPE': 'gallery'} - - elif submission.is_self and 'self' not in GLOBAL.arguments.skip: + + if submission.is_self and 'self' not in GLOBAL.arguments.skip: return {'TYPE': 'self', 'CONTENT': submission.selftext} + def extractDirectLink(URL): """Check if link is a direct image link. If so, return URL, if not, return False """ - imageTypes = ['jpg','jpeg','png','mp4','webm','gif'] + imageTypes = ['jpg', 'jpeg', 'png', 'mp4', 'webm', 'gif'] if URL[-1] == "/": URL = URL[:-1] @@ -362,11 +363,11 @@ def extractDirectLink(URL): for extension in imageTypes: if extension == URL.split(".")[-1]: return URL - else: - return None + + return None def genLinksifGallery(metadata): galleryImgUrls = list() for key in metadata: galleryImgUrls.append(metadata[key]['s']['u'].split('?')[0].replace('preview','i')) - return galleryImgUrls \ No newline at end of file + return galleryImgUrls diff --git a/src/store.py b/src/store.py index 9da77ce..2f24afa 100644 --- a/src/store.py +++ b/src/store.py @@ -1,7 +1,8 @@ from os import path + class Store: - def __init__(self,directory=None): + def __init__(self, directory=None): self.directory = directory if self.directory: if path.exists(directory): diff --git a/src/utils.py b/src/utils.py index 95d44d5..402db90 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,27 +1,28 @@ import io -import json import sys -from os import makedirs, path, remove +from os import makedirs, path from pathlib import Path from src.jsonHelper import JsonFile -from src.errors import FileNotFoundError + class GLOBAL: """Declare global variables""" RUN_TIME = "" - config = {'imgur_client_id':None, 'imgur_client_secret': None} + config = {'imgur_client_id': None, 'imgur_client_secret': None} arguments = None directory = None defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit" configDirectory = "" reddit_client_id = "U-6gk4ZCh3IeNQ" reddit_client_secret = "7CZHY6AmKweZME5s50SfDGylaPg" - downloadedPosts = lambda: [] + @staticmethod + def downloadedPosts(): return [] printVanilla = print - log_stream= None + log_stream = None + def createLogFile(TITLE): """Create a log file with given name @@ -29,43 +30,46 @@ def createLogFile(TITLE): put given arguments inside \"HEADER\" key """ - folderDirectory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME + folderDirectory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME - logFilename = TITLE.upper()+'.json' + logFilename = TITLE.upper() + '.json' if not path.exists(folderDirectory): makedirs(folderDirectory) FILE = JsonFile(folderDirectory / Path(logFilename)) HEADER = " ".join(sys.argv) - FILE.add({"HEADER":HEADER}) + FILE.add({"HEADER": HEADER}) return FILE -def printToFile(*args, noPrint=False,**kwargs): - """Print to both CONSOLE and + +def printToFile(*args, noPrint=False, **kwargs): + """Print to both CONSOLE and CONSOLE LOG file in a folder time stampt in the name """ - - folderDirectory = GLOBAL.directory / Path("LOG_FILES") / Path(GLOBAL.RUN_TIME) + + folderDirectory = GLOBAL.directory / \ + Path("LOG_FILES") / Path(GLOBAL.RUN_TIME) if not noPrint or \ GLOBAL.arguments.verbose or \ "file" in kwargs: - - print(*args,**kwargs) + + print(*args, **kwargs) if not path.exists(folderDirectory): makedirs(folderDirectory) - - if not "file" in kwargs: - with io.open( - folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8" - ) as FILE: - print(*args, file=FILE, **kwargs) -def nameCorrector(string,reference=None): - """Swap strange characters from given string + if "file" not in kwargs: + with io.open( + folderDirectory / "CONSOLE_LOG.txt", "a", encoding="utf-8" + ) as FILE: + print(*args, file=FILE, **kwargs) + + +def nameCorrector(string, reference=None): + """Swap strange characters from given string with underscore (_) and shorten it. Return the string """ @@ -82,14 +86,15 @@ def nameCorrector(string,reference=None): if totalLenght > LIMIT: limit = LIMIT - referenceLenght - string = string[:limit-1] + string = string[:limit - 1] string = string.replace(" ", "_") - + if len(string.split('\n')) > 1: string = "".join(string.split('\n')) - - BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','#', '.', '@' ,'“', '’', '\'', '!'] + + BAD_CHARS = ['\\', '/', ':', '*', '?', '"', '<', + '>', '|', '#', '.', '@', '“', '’', '\'', '!'] string = "".join([i if i not in BAD_CHARS else "_" for i in string]) - return string \ No newline at end of file + return string