## Change log
  
- Youtube support added
- Custom filenames feature added
- Custom folder structure feature added
- Unsaving downloaded posts option added
- Remove duplicate posts on different subreddits option added
- Skipping given domains option added
- Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README)
- No audio on v.redd.it videos bug fixed (see README for details about ffmpeg)
- --default-directory option is added
- --default-options is added
- --use-local-config option is added
- Bug fixes
This commit is contained in:
Ali Parlakçı
2020-06-01 15:05:02 +03:00
committed by GitHub
parent 0e007abd64
commit fd4958c06a
26 changed files with 1805 additions and 1712 deletions

View File

@@ -2,6 +2,7 @@ import os
import sys
import random
import socket
import time
import webbrowser
import urllib.request
from urllib.error import HTTPError
@@ -9,477 +10,330 @@ from urllib.error import HTTPError
import praw
from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.utils import GLOBAL, createLogFile, jsonFile, printToFile
from src.reddit import Reddit
from src.utils import GLOBAL, createLogFile, printToFile
from src.jsonHelper import JsonFile
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
NoRedditSupport, MultiredditNotFound,
InvalidSortingType, RedditLoginFailed,
InsufficientPermission)
InsufficientPermission, DirectLinkNotFound)
print = printToFile
def beginPraw(config,user_agent = str(socket.gethostname())):
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, client, message):
"""Send message to client and close the connection."""
client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
client = self.recieve_connection()
data = client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(client,
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
"""Start reddit instance"""
scopes = ['identity','history','read']
port = "1337"
arguments = {
"client_id":GLOBAL.reddit_client_id,
"client_secret":GLOBAL.reddit_client_secret,
"user_agent":user_agent
}
if "reddit_refresh_token" in GLOBAL.config:
arguments["refresh_token"] = GLOBAL.config["reddit_refresh_token"]
reddit = praw.Reddit(**arguments)
try:
reddit.auth.scopes()
except ResponseException:
arguments["redirect_uri"] = "http://localhost:" + str(port)
reddit = praw.Reddit(**arguments)
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
else:
arguments["redirect_uri"] = "http://localhost:" + str(port)
reddit = praw.Reddit(**arguments)
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
return reddit
def getPosts(args):
"""Call PRAW regarding to arguments and pass it to redditSearcher.
Return what redditSearcher has returned.
def getPosts(programMode):
"""Call PRAW regarding to arguments and pass it to extractDetails.
Return what extractDetails has returned.
"""
config = GLOBAL.config
reddit = beginPraw(config)
reddit = Reddit(GLOBAL.config["credentials"]["reddit"]).begin()
if args["sort"] == "best":
if programMode["sort"] == "best":
raise NoPrawSupport("PRAW does not support that")
if "subreddit" in args:
if "search" in args:
if args["subreddit"] == "frontpage":
args["subreddit"] = "all"
if "subreddit" in programMode:
if "search" in programMode:
if programMode["subreddit"] == "frontpage":
programMode["subreddit"] = "all"
if "user" in args:
if args["user"] == "me":
args["user"] = str(reddit.user.me())
if "user" in programMode:
if programMode["user"] == "me":
programMode["user"] = str(reddit.user.me())
if not "search" in args:
if args["sort"] == "top" or args["sort"] == "controversial":
if not "search" in programMode:
if programMode["sort"] == "top" or programMode["sort"] == "controversial":
keyword_params = {
"time_filter":args["time"],
"limit":args["limit"]
"time_filter":programMode["time"],
"limit":programMode["limit"]
}
# OTHER SORT TYPES DON'T TAKE TIME_FILTER
else:
keyword_params = {
"limit":args["limit"]
"limit":programMode["limit"]
}
else:
keyword_params = {
"time_filter":args["time"],
"limit":args["limit"]
"time_filter":programMode["time"],
"limit":programMode["limit"]
}
if "search" in args:
if GLOBAL.arguments.sort in ["hot","rising","controversial"]:
if "search" in programMode:
if programMode["sort"] in ["hot","rising","controversial"]:
raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in args:
if "subreddit" in programMode:
print (
"search for \"{search}\" in\n" \
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
search=args["search"],
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
search=programMode["search"],
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
reddit.subreddit(args["subreddit"]).search(
args["search"],
limit=args["limit"],
sort=args["sort"],
time_filter=args["time"]
return extractDetails(
reddit.subreddit(programMode["subreddit"]).search(
programMode["search"],
limit=programMode["limit"],
sort=programMode["sort"],
time_filter=programMode["time"]
)
)
elif "multireddit" in args:
elif "multireddit" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "user" in args:
elif "user" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "saved" in args:
elif "saved" in programMode:
raise ("Reddit does not support that")
if args["sort"] == "relevance":
if programMode["sort"] == "relevance":
raise InvalidSortingType("Invalid sorting type has given")
if "saved" in args:
if "saved" in programMode:
print(
"saved posts\nuser:{username}\nlimit={limit}\n".format(
username=reddit.user.me(),
limit=args["limit"]
limit=programMode["limit"]
).upper(),noPrint=True
)
return redditSearcher(reddit.user.me().saved(limit=args["limit"]))
return extractDetails(reddit.user.me().saved(limit=programMode["limit"]))
if "subreddit" in args:
if "subreddit" in programMode:
if args["subreddit"] == "frontpage":
if programMode["subreddit"] == "frontpage":
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
getattr(reddit.front,args["sort"]) (**keyword_params)
return extractDetails(
getattr(reddit.front,programMode["sort"]) (**keyword_params)
)
else:
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
return extractDetails(
getattr(
reddit.subreddit(args["subreddit"]),args["sort"]
reddit.subreddit(programMode["subreddit"]),programMode["sort"]
) (**keyword_params)
)
elif "multireddit" in args:
elif "multireddit" in programMode:
print (
"user: {user}\n" \
"multireddit: {multireddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
user=args["user"],
limit=args["limit"],
sort=args["sort"],
multireddit=args["multireddit"],
time=args["time"]
user=programMode["user"],
limit=programMode["limit"],
sort=programMode["sort"],
multireddit=programMode["multireddit"],
time=programMode["time"]
).upper(),noPrint=True
)
try:
return redditSearcher(
return extractDetails(
getattr(
reddit.multireddit(
args["user"], args["multireddit"]
),args["sort"]
programMode["user"], programMode["multireddit"]
),programMode["sort"]
) (**keyword_params)
)
except NotFound:
raise MultiredditNotFound("Multireddit not found")
elif "submitted" in args:
elif "submitted" in programMode:
print (
"submitted posts of {user}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
user=args["user"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
user=programMode["user"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
return extractDetails(
getattr(
reddit.redditor(args["user"]).submissions,args["sort"]
reddit.redditor(programMode["user"]).submissions,programMode["sort"]
) (**keyword_params)
)
elif "upvoted" in args:
elif "upvoted" in programMode:
print (
"upvoted posts of {user}\nlimit: {limit}\n".format(
user=args["user"],
limit=args["limit"]
user=programMode["user"],
limit=programMode["limit"]
).upper(),noPrint=True
)
try:
return redditSearcher(
reddit.redditor(args["user"]).upvoted(limit=args["limit"])
return extractDetails(
reddit.redditor(programMode["user"]).upvoted(limit=programMode["limit"])
)
except Forbidden:
raise InsufficientPermission("You do not have permission to do that")
elif "post" in args:
print("post: {post}\n".format(post=args["post"]).upper(),noPrint=True)
return redditSearcher(
reddit.submission(url=args["post"]),SINGLE_POST=True
elif "post" in programMode:
print("post: {post}\n".format(post=programMode["post"]).upper(),noPrint=True)
return extractDetails(
reddit.submission(url=programMode["post"]),SINGLE_POST=True
)
def redditSearcher(posts,SINGLE_POST=False):
def extractDetails(posts,SINGLE_POST=False):
"""Check posts and decide if it can be downloaded.
If so, create a dictionary with post details and append them to a list.
Write all of posts to file. Return the list
"""
subList = []
global subCount
subCount = 0
global orderCount
orderCount = 0
global gfycatCount
gfycatCount = 0
global redgifsCount
redgifsCount = 0
global imgurCount
imgurCount = 0
global eromeCount
eromeCount = 0
global gifDeliveryNetworkCount
gifDeliveryNetworkCount = 0
global directCount
directCount = 0
global selfCount
selfCount = 0
postList = []
postCount = 0
allPosts = {}
print("\nGETTING POSTS")
if GLOBAL.arguments.verbose: print("\n")
postsFile = createLogFile("POSTS")
if SINGLE_POST:
submission = posts
subCount += 1
postCount += 1
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
except AttributeError:
pass
result = checkIfMatching(submission)
result = matchWithDownloader(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
details = {**details, **result}
postList.append(details)
postsFile.add({subCount:[details]})
postsFile.add({postCount:details})
else:
try:
for submission in posts:
subCount += 1
postCount += 1
if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
if postCount % 100 == 0:
sys.stdout.write("")
sys.stdout.flush()
if subCount % 1000 == 0:
if postCount % 1000 == 0:
sys.stdout.write("\n"+" "*14)
sys.stdout.flush()
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
except AttributeError:
continue
result = checkIfMatching(submission)
result = matchWithDownloader(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
details = {**details, **result}
postList.append(details)
allPosts[subCount] = [details]
allPosts[postCount] = details
except KeyboardInterrupt:
print("\nKeyboardInterrupt",noPrint=True)
postsFile.add(allPosts)
if not len(subList) == 0:
if GLOBAL.arguments.NoDownload or GLOBAL.arguments.verbose:
print(
f"\n\nTotal of {len(subList)} submissions found!"
)
print(
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
f"and {selfCount} SELF POSTS",noPrint=True
)
else:
print()
return subList
if not len(postList) == 0:
print()
return postList
else:
raise NoMatchingSubmissionFound("No matching submission was found")
def checkIfMatching(submission):
global gfycatCount
global redgifsCount
global imgurCount
global eromeCount
global directCount
global gifDeliveryNetworkCount
global selfCount
def matchWithDownloader(submission):
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
return None
if 'v.redd.it' in submission.domain:
bitrates = ["DASH_1080","DASH_720","DASH_600", \
"DASH_480","DASH_360","DASH_240"]
for bitrate in bitrates:
videoURL = submission.url+"/"+bitrate
try:
responseCode = urllib.request.urlopen(videoURL).getcode()
except urllib.error.HTTPError:
responseCode = 0
if responseCode == 200:
return {'TYPE': 'v.redd.it', 'CONTENTURL': videoURL}
if 'gfycat' in submission.domain:
details['postType'] = 'gfycat'
gfycatCount += 1
return details
return {'TYPE': 'gfycat'}
if 'youtube' in submission.domain \
and 'watch' in submission.url:
return {'TYPE': 'youtube'}
if 'youtu.be' in submission.domain:
url = urllib.request.urlopen(submission.url).geturl()
if 'watch' in url:
return {'TYPE': 'youtube'}
elif 'imgur' in submission.domain:
details['postType'] = 'imgur'
imgurCount += 1
return details
return {'TYPE': 'imgur'}
elif 'erome' in submission.domain:
details['postType'] = 'erome'
eromeCount += 1
return details
return {'TYPE': 'erome'}
elif 'redgifs' in submission.domain:
details['postType'] = 'redgifs'
redgifsCount += 1
return details
return {'TYPE': 'redgifs'}
elif 'gifdeliverynetwork' in submission.domain:
details['postType'] = 'gifdeliverynetwork'
gifDeliveryNetworkCount += 1
return details
return {'TYPE': 'gifdeliverynetwork'}
elif submission.is_self:
details['postType'] = 'self'
details['postContent'] = submission.selftext
selfCount += 1
return details
directLink = isDirectLink(submission.url)
if directLink is not False:
details['postType'] = 'direct'
details['postURL'] = directLink
directCount += 1
return details
def printSubmission(SUB,validNumber,totalNumber):
"""Print post's link, title and media link to screen"""
print(validNumber,end=") ")
print(totalNumber,end=" ")
print(
"https://www.reddit.com/"
+"r/"
+SUB.subreddit.display_name
+"/comments/"
+SUB.id
)
print(" "*(len(str(validNumber))
+(len(str(totalNumber)))+3),end="")
elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
return {'TYPE': 'self',
'CONTENT': submission.selftext}
try:
print(SUB.title)
except:
SUB.title = "unnamed"
print("SUBMISSION NAME COULD NOT BE READ")
pass
return {'TYPE': 'direct',
'CONTENTURL': extractDirectLink(submission.url)}
except DirectLinkNotFound:
return None
print(" "*(len(str(validNumber))+(len(str(totalNumber)))+3),end="")
print(SUB.url,end="\n\n")
def isDirectLink(URL):
def extractDirectLink(URL):
"""Check if link is a direct image link.
If so, return URL,
if not, return False
@@ -508,10 +362,10 @@ def isDirectLink(URL):
return videoURL
else:
return False
raise DirectLinkNotFound
for extension in imageTypes:
if extension in URL.split("/")[-1]:
return URL
else:
return False
raise DirectLinkNotFound