From ccedac4bdcaa07fcf00670d2e7d9410c49ab70e0 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 24 Jul 2018 12:44:53 +0300 Subject: [PATCH 1/4] Add submitter to file name --- README.md | 3 +-- script.py | 29 +++++++++++++++------- src/downloader.py | 61 +++++++++++++++++++++++++++++++---------------- 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 3d057ca..992dfd4 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,12 @@ This program downloads imgur, gfycat and direct image and video links of saved p ## What it can do - Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links - Sorts posts by hot, top, new and so on -- Downloads imgur albums, gfycat links, [self posts](#how-do-i-open-self-post-files) and any link to a direct image +- Downloads **REDDIT** images and videos, **IMGUR** images and albums, **GFYCAT** links, **EROME** images and albums, **SELF POSTS** and any link to a direct image - Skips the existing ones - Puts post titles to file's name - Puts every post to its subreddit's folder - Saves a reusable copy of posts' details that are found so that they can be re-downloaded again - Logs failed ones in a file to so that you can try to download them later -- Can run with double-clicking on Windows ## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest) diff --git a/script.py b/script.py index fd97735..9749a26 100644 --- a/script.py +++ b/script.py @@ -427,17 +427,30 @@ def postExists(POST): """Figure out a file's name and checks if the file already exists""" title = nameCorrector(POST['postTitle']) - FILENAME = title + "_" + POST['postId'] + FILENAME = title + "_" + POST['postId'] + "_" + POST["postSubmitter"] PATH = GLOBAL.directory / POST["postSubreddit"] + possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"] - for i in range(2): - for extension in possibleExtensions: - FILE_PATH = PATH / (FILENAME+extension) - if FILE_PATH.exists(): - return True - else: - FILENAME = POST['postId'] + for extension in possibleExtensions: + + OLD_FILE_PATH = PATH / ( + title + + "_" + POST['postId'] + + extension + ) + FILE_PATH = PATH / ( + title + + "_" + POST['postId'] + + "_" + POST["postSubmitter"] + + extension + ) + + SHORT_FILE_PATH = PATH / (POST['postId']+extension) + + if OLD_FILE_PATH.exists() FILE_PATH.exists() or SHORT_FILE_PATH.exists(): + return True + else: return False diff --git a/src/downloader.py b/src/downloader.py index 705e259..da8d5b3 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -89,11 +89,12 @@ class Erome: title = nameCorrector(post['postTitle']) print(title+"_" +post['postId']+extension) - fileDir = title + "_" + post['postId'] + extension - fileDir = directory / fileDir - - tempDir = title + "_" + post['postId'] + '.tmp' - tempDir = directory / tempDir + fileDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+extension + ) + tempDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + ) imageURL = "https:" + IMAGES[0] @@ -108,7 +109,9 @@ class Erome: title = nameCorrector(post['postTitle']) print(title+"_"+post['postId'],end="\n\n") - folderDir = directory / (title+"_"+post['postId']) + folderDir = directory / ( + title + "_" + post["postSubmitter"] + "_" + post['postId'] + ) try: if not os.path.exists(folderDir): @@ -215,11 +218,20 @@ class Imgur: title = nameCorrector(post['postTitle']) print(title+"_" +post['postId']+post['postExt']) - fileDir = title + "_" + post['postId'] + post['postExt'] - fileDir = directory / fileDir + fileDir = directory / ( + title + + "_" + post["postSubmitter"] + + "_" + post['postId'] + + post['postExt'] + ) + + tempDir = directory / ( + title + + "_" + post["postSubmitter"] + + "_" + post['postId'] + + ".tmp" + ) - tempDir = title + "_" + post['postId'] + '.tmp' - tempDir = directory / tempDir try: getFile(fileDir,tempDir,post['mediaURL']) except FileNameTooLong: @@ -237,7 +249,9 @@ class Imgur: title = nameCorrector(post['postTitle']) print(title+"_"+post['postId'],end="\n\n") - folderDir = directory / (title+"_"+post['postId']) + folderDir = directory / ( + title + "_" + post["postSubmitter"] + "_" + post['postId'] + ) try: if not os.path.exists(folderDir): @@ -357,8 +371,13 @@ class Gfycat: title = nameCorrector(POST['postTitle']) print(title+"_"+POST['postId']+POST['postExt']) - fileDir = directory / (title+"_"+POST['postId']+POST['postExt']) - tempDir = directory / (title+"_"+POST['postId']+".tmp") + fileDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+POST['postExt'] + ) + tempDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + ) + try: getFile(fileDir,tempDir,POST['mediaURL']) except FileNameTooLong: @@ -406,11 +425,12 @@ class Direct: title = nameCorrector(POST['postTitle']) print(title+"_"+POST['postId']+POST['postExt']) - fileDir = title+"_"+POST['postId']+POST['postExt'] - fileDir = directory / fileDir - - tempDir = title+"_"+POST['postId']+".tmp" - tempDir = directory / tempDir + fileDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+POST['postExt'] + ) + tempDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + ) try: getFile(fileDir,tempDir,POST['postURL']) @@ -427,8 +447,9 @@ class Self: title = nameCorrector(post['postTitle']) print(title+"_"+post['postId']+".md") - fileDir = title+"_"+post['postId']+".md" - fileDir = directory / fileDir + fileDir = directory / ( + title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".md" + ) if Path.is_file(fileDir): raise FileAlreadyExistsError From 400ce019186788baee2ffa5e1794f98307d45e53 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 24 Jul 2018 13:17:14 +0300 Subject: [PATCH 2/4] Added older version support --- script.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/script.py b/script.py index 9749a26..9b1a009 100644 --- a/script.py +++ b/script.py @@ -448,9 +448,12 @@ def postExists(POST): SHORT_FILE_PATH = PATH / (POST['postId']+extension) - if OLD_FILE_PATH.exists() FILE_PATH.exists() or SHORT_FILE_PATH.exists(): + if OLD_FILE_PATH.exists() or \ + FILE_PATH.exists() or \ + SHORT_FILE_PATH.exists(): + return True - + else: return False From 93732b03670d9e38b58150bedc7989ac2f57b0af Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 24 Jul 2018 13:17:37 +0300 Subject: [PATCH 3/4] Little refactoring --- src/searcher.py | 112 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 57 deletions(-) diff --git a/src/searcher.py b/src/searcher.py index ea0d0ec..4c12676 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -14,60 +14,62 @@ from src.errors import (NoMatchingSubmissionFound, NoPrawSupport, print = printToFile -class GetAuth: - def __init__(self,redditInstance,port): - self.redditInstance = redditInstance - self.PORT = int(port) - - def recieve_connection(self): - """Wait for and then return a connected socket.. - Opens a TCP connection on port 8080, and waits for a single client. - """ - server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server.bind(('localhost', self.PORT)) - server.listen(1) - client = server.accept()[0] - server.close() - return client - - def send_message(self, message): - """Send message to client and close the connection.""" - self.client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')) - self.client.close() - - def getRefreshToken(self,*scopes): - state = str(random.randint(0, 65000)) - url = self.redditInstance.auth.url(scopes, state, 'permanent') - print("Go to this URL and login to reddit:\n\n",url) - webbrowser.open(url,new=2) - - self.client = self.recieve_connection() - data = self.client.recv(1024).decode('utf-8') - param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&') - params = { - key: value for (key, value) in [token.split('=') \ - for token in param_tokens] - } - if state != params['state']: - self.send_message( - client, 'State mismatch. Expected: {} Received: {}' - .format(state, params['state']) - ) - raise RedditLoginFailed - elif 'error' in params: - self.send_message(client, params['error']) - raise RedditLoginFailed - - refresh_token = self.redditInstance.auth.authorize(params['code']) - self.send_message( - "" - ) - return (self.redditInstance,refresh_token) - def beginPraw(config,user_agent = str(socket.gethostname())): + class GetAuth: + def __init__(self,redditInstance,port): + self.redditInstance = redditInstance + self.PORT = int(port) + + def recieve_connection(self): + """Wait for and then return a connected socket.. + Opens a TCP connection on port 8080, and waits for a single client. + """ + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(('localhost', self.PORT)) + server.listen(1) + client = server.accept()[0] + server.close() + return client + + def send_message(self, message): + """Send message to client and close the connection.""" + self.client.send( + 'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8') + ) + self.client.close() + + def getRefreshToken(self,*scopes): + state = str(random.randint(0, 65000)) + url = self.redditInstance.auth.url(scopes, state, 'permanent') + print("Go to this URL and login to reddit:\n\n",url) + webbrowser.open(url,new=2) + + self.client = self.recieve_connection() + data = self.client.recv(1024).decode('utf-8') + param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&') + params = { + key: value for (key, value) in [token.split('=') \ + for token in param_tokens] + } + if state != params['state']: + self.send_message( + client, 'State mismatch. Expected: {} Received: {}' + .format(state, params['state']) + ) + raise RedditLoginFailed + elif 'error' in params: + self.send_message(client, params['error']) + raise RedditLoginFailed + + refresh_token = self.redditInstance.auth.authorize(params['code']) + self.send_message( + "" + ) + return (self.redditInstance,refresh_token) + """Start reddit instance""" scopes = ['identity','history','read'] @@ -245,8 +247,6 @@ def getPosts(args): raise MultiredditNotFound elif "submitted" in args: - # TODO - # USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER print ( "submitted posts of {user}\nsort: {sort}\n" \ "time: {time}\nlimit: {limit}\n".format( @@ -263,8 +263,6 @@ def getPosts(args): ) elif "upvoted" in args: - # TODO - # USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER print ( "upvoted posts of {user}\nlimit: {limit}\n".format( user=args["user"], From 9d0fdc75218293bafa6c9e17903295dab1e8bfc3 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 24 Jul 2018 18:55:33 +0300 Subject: [PATCH 4/4] Add OP's name first --- script.py | 9 ++++----- src/downloader.py | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/script.py b/script.py index 9b1a009..8092d38 100644 --- a/script.py +++ b/script.py @@ -423,11 +423,10 @@ def postFromLog(fileName): return posts -def postExists(POST): +def isPostExists(POST): """Figure out a file's name and checks if the file already exists""" title = nameCorrector(POST['postTitle']) - FILENAME = title + "_" + POST['postId'] + "_" + POST["postSubmitter"] PATH = GLOBAL.directory / POST["postSubreddit"] possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"] @@ -440,9 +439,9 @@ def postExists(POST): + extension ) FILE_PATH = PATH / ( - title + POST["postSubmitter"] + + "_" + title + "_" + POST['postId'] - + "_" + POST["postSubmitter"] + extension ) @@ -539,7 +538,7 @@ def download(submissions): ) ) - if postExists(submissions[i]): + if isPostExists(submissions[i]): print(submissions[i]['postType'].upper()) print("It already exists") duplicates += 1 diff --git a/src/downloader.py b/src/downloader.py index da8d5b3..1908468 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -87,13 +87,13 @@ class Erome: extension = getExtension(IMAGES[0]) title = nameCorrector(post['postTitle']) - print(title+"_" +post['postId']+extension) + print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension) fileDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+extension + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+extension ) tempDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp" ) imageURL = "https:" + IMAGES[0] @@ -107,10 +107,10 @@ class Erome: else: title = nameCorrector(post['postTitle']) - print(title+"_"+post['postId'],end="\n\n") + print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n") folderDir = directory / ( - title + "_" + post["postSubmitter"] + "_" + post['postId'] + post["postSubmitter"] + "_" + title + "_" + post['postId'] ) try: @@ -216,18 +216,18 @@ class Imgur: post['postExt'] = getExtension(post['mediaURL']) title = nameCorrector(post['postTitle']) - print(title+"_" +post['postId']+post['postExt']) + print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt']) fileDir = directory / ( - title - + "_" + post["postSubmitter"] + post["postSubmitter"] + + "_" + title + "_" + post['postId'] + post['postExt'] ) tempDir = directory / ( - title - + "_" + post["postSubmitter"] + post["postSubmitter"] + + "_" + title + "_" + post['postId'] + ".tmp" ) @@ -247,10 +247,10 @@ class Imgur: duplicates = 0 title = nameCorrector(post['postTitle']) - print(title+"_"+post['postId'],end="\n\n") + print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n") folderDir = directory / ( - title + "_" + post["postSubmitter"] + "_" + post['postId'] + post["postSubmitter"] + "_" + title + "_" + post['postId'] ) try: @@ -369,13 +369,13 @@ class Gfycat: if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) - print(title+"_"+POST['postId']+POST['postExt']) + print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']) fileDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+POST['postExt'] + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'] ) tempDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp" ) try: @@ -423,13 +423,13 @@ class Direct: POST['postExt'] = getExtension(POST['postURL']) if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) - print(title+"_"+POST['postId']+POST['postExt']) + print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']) fileDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+POST['postExt'] + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'] ) tempDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".tmp" + POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp" ) try: @@ -445,10 +445,10 @@ class Self: if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(post['postTitle']) - print(title+"_"+post['postId']+".md") + print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md") fileDir = directory / ( - title+"_"+POST["postSubmitter"]+"_"+POST['postId']+".md" + post["postSubmitter"]+"_"+title+"_"+post['postId']+".md" ) if Path.is_file(fileDir): @@ -472,7 +472,11 @@ class Self: + ")\n" + post["postContent"] + "\n\n---\n\n" - + "submitted by [u/" + + "submitted to [r/" + + post["postSubreddit"] + + "](https://www.reddit.com/r/" + + post["postSubreddit"] + + ") by [u/" + post["postSubmitter"] + "](https://www.reddit.com/user/" + post["postSubmitter"]