Initial commit

2018-07-09 22:58:11 +03:00
commit 7e7af01370
11 changed files with 2553 additions and 0 deletions
--- a/script.py
+++ b/script.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python
+
+"""
+This program downloads imgur, gfycat and direct image and video links of 
+saved posts from a reddit account. It is written in Python 3.
+"""
+
+import argparse
+import os
+import sys
+import time
+from pathlib import Path, PurePath
+
+from src.downloader import Direct, Gfycat, Imgur
+from src.parser import LinkDesigner
+from src.searcher import getPosts
+from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
+                       printToFile)
+from src.errors import *
+
+__author__ = "Ali Parlakci"
+__license__ = "GPL"
+__version__ = "1.0.0"
+__maintainer__ = "Ali Parlakci"
+__email__ = "parlakciali@gmail.com"
+
+def debug(*post):
+    GLOBAL.config = getConfig('config.json')
+    GLOBAL.directory = Path(".\\debug\\")
+    download([*post])
+    quit()
+
+def getConfig(configFileName):
+    """Read credentials from config.json file"""
+
+    keys = ['imgur_client_id',
+            'imgur_client_secret']
+
+    if os.path.exists(configFileName):
+        FILE = jsonFile(configFileName)
+        content = FILE.read()
+        if "reddit_refresh_token" in content:
+            if content["reddit_refresh_token"] == "":
+                FILE.delete("reddit_refresh_token")
+        for key in keys:
+            try:
+                if content[key] == "":
+                    raise KeyError
+            except KeyError:
+                print(key,": ")
+                FILE.add({key:input()})
+        return jsonFile(configFileName).read()
+
+    else:
+        FILE = jsonFile(configFileName)
+        configDictionary = {}
+        for key in keys:
+            configDictionary[key] = input(key + ": ")
+        FILE.add(configDictionary)
+        return FILE.read()
+
+def parseArguments(arguments=[]):
+    """Initialize argparse and add arguments"""
+
+    parser = argparse.ArgumentParser(allow_abbrev=False,
+                                     description="This program downloads " \
+                                                 "media from reddit " \
+                                                 "posts")
+    parser.add_argument("directory",
+                        help="Specifies the directory where posts will be " \
+                        "downloaded to",
+                        metavar="DIRECTORY")
+
+    parser.add_argument("--link","-l",
+                        help="Get posts from link",
+                        metavar="link")
+
+    parser.add_argument("--saved",
+                        action="store_true",
+                        help="Triggers saved mode")
+
+    parser.add_argument("--submitted",
+                        action="store_true",
+                        help="Gets posts of --user")
+
+    parser.add_argument("--upvoted",
+                        action="store_true",
+                        help="Gets upvoted posts of --user")
+
+    parser.add_argument("--log",
+                        help="Triggers log read mode and takes a log file",
+                        # type=argparse.FileType('r'),
+                        metavar="LOG FILE")
+
+    parser.add_argument("--subreddit",
+                        nargs="+",
+                        help="Triggers subreddit mode and takes subreddit's " \
+                             "name without r/. use \"frontpage\" for frontpage",
+                        metavar="SUBREDDIT",
+                        type=str)
+    
+    parser.add_argument("--multireddit",
+                        help="Triggers multireddit mode and takes "\
+                             "multireddit's name without m/",
+                        metavar="MULTIREDDIT",
+                        type=str)
+
+    parser.add_argument("--user",
+                        help="reddit username if needed. use \"me\" for " \
+                             "current user",
+                        required="--multireddit" in sys.argv or \
+                                 "--submitted" in sys.argv,
+                        metavar="redditor",
+                        type=str)
+
+    parser.add_argument("--search",
+                        help="Searches for given query in given subreddits",
+                        metavar="query",
+                        type=str)
+
+    parser.add_argument("--sort",
+                        help="Either hot, top, new, controversial, rising " \
+                             "or relevance default: hot",
+                        choices=[
+                            "hot","top","new","controversial","rising",
+                            "relevance"
+                        ],
+                        metavar="SORT TYPE",
+                        type=str)
+
+    parser.add_argument("--limit",
+                        help="default: unlimited",
+                        metavar="Limit",
+                        default=None,
+                        type=int)
+
+    parser.add_argument("--time",
+                        help="Either hour, day, week, month, year or all." \
+                             " default: all",
+                        choices=["all","hour","day","week","month","year"],
+                        metavar="TIME_LIMIT",
+                        type=str)
+    
+    parser.add_argument("--NoDownload",
+                        help="Just gets the posts and store them in a file" \
+                             " for downloading later",
+                        action="store_true",
+                        default=False)
+
+    if arguments == []:
+        return parser.parse_args()
+    else:
+        return parser.parse_args(arguments)
+
+def checkConflicts():
+    """Check if command-line arguments are given correcly,
+    if not, raise errors
+    """
+
+    if GLOBAL.arguments.saved is False:
+        saved = 0
+    else: 
+        saved = 1
+
+    if GLOBAL.arguments.subreddit is None:
+        subreddit = 0
+    else:
+        subreddit = 1
+
+    if GLOBAL.arguments.submitted is False:
+        submitted = 0
+    else:
+        submitted = 1
+    
+    if GLOBAL.arguments.search is None:
+        search = 0
+    else:
+        search = 1
+
+    if GLOBAL.arguments.log is None:
+        log = 0
+    else:
+        log = 1
+
+    if GLOBAL.arguments.link is None:
+        link = 0
+    else:
+        link = 1
+
+    if GLOBAL.arguments.user is None:
+        user = 0
+    else:
+        user = 1
+
+    if GLOBAL.arguments.upvoted is False:
+        upvoted = 0
+    else:
+        upvoted = 1
+
+    if not saved+subreddit+log+link+submitted+upvoted == 1:
+        print("Program mode is invalid")
+        quit()
+    
+    if search+subreddit == 2:
+        print("You cannot search in your saved posts")
+        quit()
+
+    if search+submitted == 2:
+        print("You cannot search in submitted posts")
+        quit()
+
+    if search+upvoted == 2:
+        print("You cannot search in upvoted posts")
+        quit()
+
+    if upvoted+subreddit == 1 and user == 0:
+        print("No redditor name given")
+        quit()
+
+def postFromLog(fileName):
+    """Analyze a log file and return a list of dictionaries containing
+    submissions
+    """
+    if Path.is_file(Path(fileName)):
+        content = jsonFile(fileName).read()
+    else:
+        print("File not found")
+        quit()
+
+    try:
+        del content["HEADER"]
+    except KeyError:
+        pass
+
+    posts = []
+
+    for post in content:
+        if not content[post][-1]['postType'] == None:
+            posts.append(content[post][-1])
+
+    return posts
+
+def prepareAttributes():
+    ATTRIBUTES = {}
+
+    if GLOBAL.arguments.user is not None:
+        ATTRIBUTES["user"] = GLOBAL.arguments.user
+
+    if GLOBAL.arguments.search is not None:
+        ATTRIBUTES["search"] = GLOBAL.arguments.search
+        if GLOBAL.arguments.sort == "hot" or \
+           GLOBAL.arguments.sort == "controversial" or \
+           GLOBAL.arguments.sort == "rising":
+            GLOBAL.arguments.sort = "relevance"
+
+    if GLOBAL.arguments.sort is not None:
+        ATTRIBUTES["sort"] = GLOBAL.arguments.sort
+    else:
+        if GLOBAL.arguments.submitted:
+            ATTRIBUTES["sort"] = "new"
+        else:
+            ATTRIBUTES["sort"] = "hot"
+
+    if GLOBAL.arguments.time is not None:
+        ATTRIBUTES["time"] = GLOBAL.arguments.time
+    else:
+        ATTRIBUTES["time"] = "all"
+
+    if GLOBAL.arguments.link is not None:
+
+        GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
+
+        try:
+            ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
+        except InvalidRedditLink:
+            raise InvalidRedditLink
+
+        if GLOBAL.arguments.search is not None:
+            ATTRIBUTES["search"] = GLOBAL.arguments.search
+
+        if GLOBAL.arguments.sort is not None:
+            ATTRIBUTES["sort"] = GLOBAL.arguments.sort
+
+        if GLOBAL.arguments.time is not None:
+            ATTRIBUTES["time"] = GLOBAL.arguments.time
+
+    elif GLOBAL.arguments.subreddit is not None:
+        GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
+
+        ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
+
+    elif GLOBAL.arguments.saved is True:
+        ATTRIBUTES["saved"] = True
+
+    elif GLOBAL.arguments.upvoted is True:
+        ATTRIBUTES["upvoted"] = True
+
+    elif GLOBAL.arguments.submitted is not None:
+        ATTRIBUTES["submitted"] = True
+
+        if GLOBAL.arguments.sort == "rising":
+            raise InvalidSortingType
+    
+    ATTRIBUTES["limit"] = GLOBAL.arguments.limit
+
+    return ATTRIBUTES
+
+def postExists(POST):
+    """Figure out a file's name and checks if the file already exists"""
+
+    title = nameCorrector(POST['postTitle'])
+    FILENAME = title + "_" + POST['postId']
+    PATH = GLOBAL.directory / POST["postSubreddit"]
+    possibleExtensions = [".jpg",".png",".mp4",".gif",".webm"]
+
+    for i in range(2):
+        for extension in possibleExtensions:
+            FILE_PATH = PATH / (FILENAME+extension)
+            if FILE_PATH.exists():
+                return True
+        else:
+            FILENAME = POST['postId']
+    else:
+        return False
+
+def download(submissions):
+    """Analyze list of submissions and call the right function
+    to download each one, catch errors, update the log files
+    """
+
+    subsLenght = len(submissions)
+    lastRequestTime = 0
+    downloadedCount = subsLenght
+    duplicates = 0
+    BACKUP = {}
+
+    FAILED_FILE = createLogFile("FAILED")
+
+    for i in range(subsLenght):
+        print("\n({}/{})".format(i+1,subsLenght))
+        print(
+            "https://reddit.com/r/{subreddit}/comments/{id}".format(
+                subreddit=submissions[i]['postSubreddit'],
+                id=submissions[i]['postId']
+            )
+        )
+
+        if postExists(submissions[i]):
+            result = False
+            print("It already exists")
+            duplicates += 1
+            downloadedCount -= 1
+            continue
+
+        directory = GLOBAL.directory / submissions[i]['postSubreddit']
+
+        if submissions[i]['postType'] == 'imgur':
+            print("IMGUR",end="")
+            
+            while int(time.time() - lastRequestTime) <= 2:
+                pass
+            credit = Imgur.get_credits()
+
+            IMGUR_RESET_TIME = credit['UserReset']-time.time()
+            USER_RESET = ("after " \
+                          + str(int(IMGUR_RESET_TIME/60)) \
+                          + " Minutes " \
+                          + str(int(IMGUR_RESET_TIME%60)) \
+                          + " Seconds") 
+            print(
+                " => Client: {} - User: {} - Reset {}".format(
+                    credit['ClientRemaining'],
+                    credit['UserRemaining'],
+                    USER_RESET
+                )
+            )
+
+            if not (credit['UserRemaining'] == 0 or \
+                    credit['ClientRemaining'] == 0):
+
+                """This block of code is needed
+                """
+                while int(time.time() - lastRequestTime) <= 2:
+                    pass
+                lastRequestTime = time.time()
+
+                try:
+                    Imgur(directory,submissions[i])
+
+                except FileAlreadyExistsError:
+                    print("It already exists")
+                    duplicates += 1
+                    downloadedCount -= 1
+
+                except ImgurLoginError:
+                    print(
+                        "Imgur login failed. Quitting the program "\
+                        "as unexpected errors might occur."
+                    )
+                    quit()
+
+                except Exception as exception:
+                    print(exception)
+                    FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+                    downloadedCount -= 1
+
+            else:
+                if credit['UserRemaining'] == 0:
+                    KEYWORD = "user"
+                elif credit['ClientRemaining'] == 0:
+                    KEYWORD = "client"
+
+                print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
+                FAILED_FILE.add(
+                    {int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
+                               submissions[i]]}
+                )
+                downloadedCount -= 1
+
+        elif submissions[i]['postType'] == 'gfycat':
+            print("GFYCAT")
+            try:
+                Gfycat(directory,submissions[i])
+
+            except FileAlreadyExistsError:
+                print("It already exists")
+                duplicates += 1
+                downloadedCount -= 1
+                
+            except NotADownloadableLinkError as exception:
+                print("Could not read the page source")
+                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+                downloadedCount -= 1
+
+            except Exception as exception:
+                print(exception)
+                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+                downloadedCount -= 1
+
+        elif submissions[i]['postType'] == 'direct':
+            print("DIRECT")
+            try:
+                Direct(directory,submissions[i])
+
+            except FileAlreadyExistsError:
+                print("It already exists")
+                downloadedCount -= 1
+                duplicates += 1
+
+            except Exception as exception:
+                print(exception)
+                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+                downloadedCount -= 1
+                
+        else:
+            print("No match found, skipping...")
+            downloadedCount -= 1
+
+    if duplicates:
+        print("\n There was {} duplicates".format(duplicates))
+
+    if downloadedCount == 0:
+        print(" Nothing downloaded :(")
+
+    else:
+        print(" Total of {} links downloaded!".format(downloadedCount))
+
+def main():
+    if sys.argv[-1].endswith(__file__):
+        GLOBAL.arguments = parseArguments(input("> ").split())
+    else:
+        GLOBAL.arguments = parseArguments()
+    if GLOBAL.arguments.directory is not None:
+        GLOBAL.directory = Path(GLOBAL.arguments.directory)
+    else:
+        print("Invalid directory")
+        quit()
+    GLOBAL.config = getConfig(Path(PurePath(__file__).parent / 'config.json'))
+
+    checkConflicts()
+
+    mode = prepareAttributes()
+       
+    print(sys.argv)
+
+    if GLOBAL.arguments.log is not None:
+        logDir = Path(GLOBAL.arguments.log)
+        download(postFromLog(logDir))
+        quit()
+
+    try:
+        POSTS = getPosts(prepareAttributes())
+    except InsufficientPermission:
+        print("You do not have permission to do that")
+        quit()
+    except NoMatchingSubmissionFound:
+        print("No matching submission was found")
+        quit()
+    except NoRedditSupoort:
+        print("Reddit does not support that")
+        quit()
+    except NoPrawSupport:
+        print("PRAW does not support that")
+        quit()
+    except MultiredditNotFound:
+        print("Multireddit not found")
+        quit()
+    except InvalidSortingType:
+        print("Invalid sorting type has given")
+        quit()
+    except InvalidRedditLink:
+        print("Invalid reddit link")
+        quit()
+
+    if POSTS is None:
+        print("I could not find any posts in that URL")
+        quit()
+
+    if GLOBAL.arguments.NoDownload:
+        quit()
+
+    else:
+        download(POSTS)
+
+if __name__ == "__main__":
+    try:
+        VanillaPrint = print
+        print = printToFile
+        GLOBAL.RUN_TIME = time.time()
+        main()
+    except KeyboardInterrupt:
+        print("\nQUITTING...")
+        quit()