## Change log
  
- Youtube support added
- Custom filenames feature added
- Custom folder structure feature added
- Unsaving downloaded posts option added
- Remove duplicate posts on different subreddits option added
- Skipping given domains option added
- Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README)
- No audio on v.redd.it videos bug fixed (see README for details about ffmpeg)
- --default-directory option is added
- --default-options is added
- --use-local-config option is added
- Bug fixes
This commit is contained in:
Ali Parlakçı
2020-06-01 15:05:02 +03:00
committed by GitHub
parent 0e007abd64
commit fd4958c06a
26 changed files with 1805 additions and 1712 deletions

148
src/arguments.py Normal file
View File

@@ -0,0 +1,148 @@
import argparse
import sys
class Arguments:
@staticmethod
def parse(arguments=[]):
"""Initialize argparse and add arguments"""
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads " \
"media from reddit " \
"posts")
parser.add_argument("--directory","-d",
help="Specifies the directory where posts will be " \
"downloaded to",
metavar="DIRECTORY")
parser.add_argument("--verbose","-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit","-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link","-l",
help="Get posts from link",
metavar="link")
parser.add_argument("--saved",
action="store_true",
required="--unsave" in sys.argv,
help="Triggers saved mode")
parser.add_argument("--unsave",
action="store_true",
help="Unsaves downloaded posts")
parser.add_argument("--submitted",
action="store_true",
help="Gets posts of --user")
parser.add_argument("--upvoted",
action="store_true",
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Takes a log file which created by itself " \
"(json files), reads posts and tries downloadin" \
"g them again.",
# type=argparse.FileType('r'),
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's " \
"name without r/. use \"frontpage\" for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes "\
"multireddit's name without m/",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for " \
"current user",
required="--multireddit" in sys.argv or \
"--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument("--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising " \
"or relevance default: hot",
choices=[
"hot","top","new","controversial","rising",
"relevance"
],
metavar="SORT TYPE",
type=str)
parser.add_argument("--limit",
help="default: unlimited",
metavar="Limit",
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all." \
" default: all",
choices=["all","hour","day","week","month","year"],
metavar="TIME_LIMIT",
type=str)
parser.add_argument("--skip",
nargs="+",
help="Skip posts with given domain",
type=str,
default=[])
parser.add_argument("--set-folderpath",
action="store_true",
help="Set custom folderpath"
)
parser.add_argument("--set-filename",
action="store_true",
help="Set custom filename",
)
parser.add_argument("--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument("--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument("--use-local-config",
action="store_true",
help="Creates a config file in the program's directory and uses it. Useful for having multiple configs",
)
parser.add_argument("--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument("--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str
)
if arguments == []:
return parser.parse_args()
else:
return parser.parse_args(arguments)

151
src/config.py Normal file
View File

@@ -0,0 +1,151 @@
import os
import socket
import webbrowser
import random
from src.reddit import Reddit
from src.jsonHelper import JsonFile
class Config():
def __init__(self,filename):
self.filename = filename
self.file = JsonFile(self.filename)
def generate(self):
self._validateCredentials()
self._readCustomFileName()
self._readCustomFolderPath()
self._readDefaultOptions()
return self.file.read()
def setCustomFileName(self):
print("""
IMPORTANT: Do not change the filename structure frequently.
If you did, the program could not find duplicates and
would download the already downloaded files again.
This would not create any duplicates in the directory but
the program would not be as snappy as it should be.
Type a template file name for each post.
You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post
For example: {FLAIR}_{SUBREDDIT}_{REDDITOR}
Existing filename template:""", None if "filename" not in self.file.read() else self.file.read()["filename"])
filename = input(">> ").upper()
self.file.add({
"filename": filename
})
def _readCustomFileName(self):
content = self.file.read()
if not "filename" in content:
self.file.add({
"filename": "{REDDITOR}_{TITLE}_{POSTID}"
})
content = self.file.read()
if not "{POSTID}" in content["filename"]:
self.file.add({
"filename": content["filename"] + "_{POSTID}"
})
def setCustomFolderPath(self):
print("""
Type a folder structure (generic folder path)
Use slash or DOUBLE backslash to separate folders
You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post
For example: {REDDITOR}/{SUBREDDIT}/{FLAIR}
Existing folder structure""", None if "folderpath" not in self.file.read() else self.file.read()["folderpath"])
folderpath = input(">> ").strip("\\").strip("/").upper()
self.file.add({
"folderpath": folderpath
})
def _readCustomFolderPath(self,path=None):
content = self.file.read()
if not "folderpath" in content:
self.file.add({
"folderpath": "{SUBREDDIT}"
})
def setDefaultOptions(self):
print("""
Type options to be used everytime script runs
For example: --no-dupes --quit --limit 100 --skip youtube.com
Existing default options:""", None if "options" not in self.file.read() else self.file.read()["options"])
options = input(">> ").strip("")
self.file.add({
"options": options
})
def _readDefaultOptions(self,path=None):
content = self.file.read()
if not "options" in content:
self.file.add({
"options": ""
})
def _validateCredentials(self):
"""Read credentials from config.json file"""
keys = ['imgur_client_id',
'imgur_client_secret']
try:
content = self.file.read()["credentials"]
except:
self.file.add({
"credentials":{}
})
content = self.file.read()["credentials"]
if "reddit" in content and len(content["reddit"]) != 0:
pass
else:
Reddit().begin()
if not all(content.get(key,False) for key in keys):
print(
"---Setting up the Imgur API---\n\n" \
"Go to this URL and fill the form:\n" \
"https://api.imgur.com/oauth2/addclient\n" \
"Then, enter the client id and client secret here\n" \
"Press Enter to open the link in the browser"
)
input()
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
for key in keys:
try:
if content[key] == "":
raise KeyError
except KeyError:
self.file.add({key:input("\t"+key+": ")},
"credentials")
print()
def setDefaultDirectory(self):
print("""Set a default directory to use in case no directory is given
Leave blank to reset it. You can use {time} in foler names to use to timestamp it
For example: D:/archive/BDFR_{time}
""")
print("Current default directory:", self.file.read()["default_directory"] if "default_directory" in self.file.read() else "")
self.file.add({
"default_directory": input(">> ")
})

View File

@@ -3,30 +3,16 @@ import os
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import FileNameTooLong
from src.utils import nameCorrector
from src.utils import GLOBAL
from src.utils import printToFile as print
class Direct:
def __init__(self,directory,POST):
POST['postExt'] = getExtension(POST['postURL'])
POST['EXTENSION'] = getExtension(POST['CONTENTURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try:
getFile(fileDir,tempDir,POST['postURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['postURL'])
getFile(filename,shortFilename,directory,POST['CONTENTURL'])

View File

@@ -1,4 +1,6 @@
import os
import logging
import sys
import urllib.request
from html.parser import HTMLParser
@@ -6,14 +8,14 @@ from src.downloaders.downloaderUtils import getFile
from src.downloaders.downloaderUtils import getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import nameCorrector
NotADownloadableLinkError, FileAlreadyExistsError, full_exc_info)
from src.utils import GLOBAL
from src.utils import printToFile as print
class Erome:
def __init__(self,directory,post):
try:
IMAGES = self.getLinks(post['postURL'])
IMAGES = self.getLinks(post['CONTENTURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
@@ -27,59 +29,43 @@ class Erome:
"""Filenames are declared here"""
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
)
tempDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
)
filename = GLOBAL.config['filename'].format(**post)+post["EXTENSION"]
shortFilename = post['POSTID'] + extension
imageURL = IMAGES[0]
if 'https://' not in imageURL and 'http://' not in imageURL:
if 'https://' not in imageURL or 'http://' not in imageURL:
imageURL = "https://" + imageURL
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
getFile(filename,shortFilename,directory,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
filename = GLOBAL.config['filename'].format(**post)
folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
print(filename)
folderDir = directory / filename
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
folderDir = directory / post['POSTID']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
filename = str(i+1)+extension
imageURL = IMAGES[i]
if 'https://' not in imageURL and 'http://' not in imageURL:
imageURL = "https://" + imageURL
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
print(" {}".format(filename))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
getFile(filename,filename,folderDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")

View File

@@ -6,41 +6,26 @@ from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import nameCorrector
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork
class Gfycat:
def __init__(self,directory,POST):
try:
POST['mediaURL'] = self.getLink(POST['postURL'])
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL'])
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try:
getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['mediaURL'])
@staticmethod
def getLink(url):
"""Extract direct link to the video from page's source

View File

@@ -13,7 +13,7 @@ class Imgur:
def __init__(self,directory,post):
self.imgurClient = self.initImgur()
imgurID = self.getId(post['postURL'])
imgurID = self.getId(post['CONTENTURL'])
content = self.getLink(imgurID)
if not os.path.exists(directory): os.makedirs(directory)
@@ -21,38 +21,16 @@ class Imgur:
if content['type'] == 'image':
try:
post['mediaURL'] = content['object'].mp4
post['MEDIAURL'] = content['object'].mp4
except AttributeError:
post['mediaURL'] = content['object'].link
post['MEDIAURL'] = content['object'].link
post['postExt'] = getExtension(post['mediaURL'])
post['EXTENSION'] = getExtension(post['MEDIAURL'])
filename = GLOBAL.config['filename'].format(**post)+post["EXTENSION"]
shortFilename = post['POSTID']+post['EXTENSION']
title = nameCorrector(post['postTitle'])
"""Filenames are declared here"""
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
fileDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ post['postExt']
)
tempDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ ".tmp"
)
try:
getFile(fileDir,tempDir,post['mediaURL'])
except FileNameTooLong:
fileDir = directory / post['postId'] + post['postExt']
tempDir = directory / post['postId'] + '.tmp'
getFile(fileDir,tempDir,post['mediaURL'])
getFile(filename,shortFilename,directory,post['MEDIAURL'])
elif content['type'] == 'album':
images = content['object'].images
@@ -60,18 +38,17 @@ class Imgur:
howManyDownloaded = imagesLenght
duplicates = 0
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
filename = GLOBAL.config['filename'].format(**post)
folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
print(filename)
folderDir = directory / filename
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
folderDir = directory / post['POSTID']
os.makedirs(folderDir)
for i in range(imagesLenght):
@@ -82,42 +59,24 @@ class Imgur:
images[i]['Ext'] = getExtension(imageURL)
fileName = (str(i+1)
filename = (str(i+1)
+ "_"
+ nameCorrector(str(images[i]['title']))
+ "_"
+ images[i]['id'])
"""Filenames are declared here"""
shortFilename = (str(i+1) + "_" + images[i]['id'])
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+images[i]['Ext']))
print("\n ({}/{})".format(i+1,imagesLenght))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
getFile(filename,shortFilename,folderDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
# IF FILE NAME IS TOO LONG, IT WONT REGISTER
except FileNameTooLong:
fileName = (str(i+1) + "_" + images[i]['id'])
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
try:
getFile(fileDir,tempDir,imageURL,indent=2)
# IF STILL TOO LONG
except FileNameTooLong:
fileName = str(i+1)
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
getFile(fileDir,tempDir,imageURL,indent=2)
except Exception as exception:
print("\n Could not get the file")
print(
@@ -143,8 +102,8 @@ class Imgur:
config = GLOBAL.config
return imgurpython.ImgurClient(
config['imgur_client_id'],
config['imgur_client_secret']
config["credentials"]['imgur_client_id'],
config["credentials"]['imgur_client_secret']
)
def getId(self,submissionURL):
"""Extract imgur post id

View File

@@ -1,9 +1,14 @@
import sys
import os
import time
from urllib.error import HTTPError
import urllib.request
from pathlib import Path
import hashlib
from src.errors import FileAlreadyExistsError, FileNameTooLong
from src.utils import nameCorrector, GLOBAL
from src.utils import printToFile as print
from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, DomainInSkip
def dlProgress(count, blockSize, totalSize):
"""Function for writing download progress to console
@@ -30,16 +35,10 @@ def getExtension(link):
else:
return '.mp4'
def getFile(fileDir,tempDir,imageURL,indent=0):
"""Downloads given file to given directory.
def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
fileDir -- Full file directory
tempDir -- Full file directory with the extension of '.tmp'
imageURL -- URL to the file to be downloaded
redditID -- Post's reddit id if renaming the file is necessary.
As too long file names seem not working.
"""
if any(domain in imageURL for domain in GLOBAL.arguments.skip):
raise DomainInSkip
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
@@ -58,20 +57,45 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
opener.addheaders = headers
urllib.request.install_opener(opener)
if not (os.path.isfile(fileDir)):
for i in range(3):
filename = nameCorrector(filename)
if not silent: print(" "*indent + str(folderDir),
" "*indent + str(filename),
sep="\n")
for i in range(3):
fileDir = Path(folderDir) / filename
tempDir = Path(folderDir) / (filename+".tmp")
if not (os.path.isfile(fileDir)):
try:
urllib.request.urlretrieve(imageURL,
tempDir,
reporthook=dlProgress)
if GLOBAL.arguments.no_dupes:
fileHash = createHash(tempDir)
if fileHash in GLOBAL.hashList:
os.remove(tempDir)
raise FileAlreadyExistsError
GLOBAL.hashList.add(fileHash)
os.rename(tempDir,fileDir)
if not silent: print(" "*indent+"Downloaded"+" "*10)
return None
except ConnectionResetError as exception:
print(" "*indent + str(exception))
print(" "*indent + "Trying again\n")
if not silent: print(" "*indent + str(exception))
if not silent: print(" "*indent + "Trying again\n")
except FileNotFoundError:
raise FileNameTooLong
else:
print(" "*indent+"Downloaded"+" "*10)
break
else:
raise FileAlreadyExistsError
filename = shortFilename
else:
raise FileAlreadyExistsError
raise FailedToDownload
def createHash(filename):
hash_md5 = hashlib.md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

View File

@@ -6,39 +6,24 @@ from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import nameCorrector
from src.utils import GLOBAL
from src.utils import printToFile as print
class GifDeliveryNetwork:
def __init__(self,directory,POST):
try:
POST['mediaURL'] = self.getLink(POST['postURL'])
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL'])
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
try:
getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['mediaURL'])
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
@staticmethod
def getLink(url):

View File

@@ -6,39 +6,24 @@ from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import nameCorrector
from src.utils import GLOBAL
from src.utils import printToFile as print
class Redgifs:
def __init__(self,directory,POST):
try:
POST['mediaURL'] = self.getLink(POST['postURL'])
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL'])
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try:
getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['mediaURL'])
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
def getLink(self, url):
"""Extract direct link to the video from page's source

View File

@@ -3,7 +3,7 @@ import os
from pathlib import Path
from src.errors import FileAlreadyExistsError
from src.utils import nameCorrector
from src.utils import GLOBAL
VanillaPrint = print
from src.utils import printToFile as print
@@ -12,23 +12,20 @@ class SelfPost:
def __init__(self,directory,post):
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(post['postTitle'])
filename = GLOBAL.config['filename'].format(**post)
"""Filenames are declared here"""
fileDir = directory / (filename+".md")
print(fileDir)
print(filename+".md")
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
)
if Path.is_file(fileDir):
raise FileAlreadyExistsError
try:
self.writeToFile(fileDir,post)
except FileNotFoundError:
fileDir = post['postId']+".md"
fileDir = post['POSTID']+".md"
fileDir = directory / fileDir
self.writeToFile(fileDir,post)
@@ -38,20 +35,20 @@ class SelfPost:
"""Self posts are formatted here"""
content = ("## ["
+ post["postTitle"]
+ post["TITLE"]
+ "]("
+ post["postURL"]
+ post["CONTENTURL"]
+ ")\n"
+ post["postContent"]
+ post["CONTENT"]
+ "\n\n---\n\n"
+ "submitted to [r/"
+ post["postSubreddit"]
+ post["SUBREDDIT"]
+ "](https://www.reddit.com/r/"
+ post["postSubreddit"]
+ post["SUBREDDIT"]
+ ") by [u/"
+ post["postSubmitter"]
+ post["REDDITOR"]
+ "](https://www.reddit.com/user/"
+ post["postSubmitter"]
+ post["REDDITOR"]
+ ")")
with io.open(directory,"w",encoding="utf-8") as FILE:

View File

@@ -0,0 +1,57 @@
import os
import subprocess
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import FileNameTooLong
from src.utils import GLOBAL
from src.utils import printToFile as print
class VReddit:
def __init__(self,directory,post):
extension = ".mp4"
if not os.path.exists(directory): os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post)+extension
shortFilename = post['POSTID']+extension
try:
FNULL = open(os.devnull, 'w')
subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT)
except:
getFile(filename,shortFilename,directory,post['CONTENTURL'])
print("FFMPEG library not found, skipping merging video and audio")
else:
videoName = post['POSTID'] + "_video"
videoURL = post['CONTENTURL']
audioName = post['POSTID'] + "_audio"
audioURL = videoURL[:videoURL.rfind('/')] + '/audio'
print(directory,filename,sep="\n")
getFile(videoName,videoName,directory,videoURL,silent=True)
getFile(audioName,audioName,directory,audioURL,silent=True)
try:
self._mergeAudio(videoName,
audioName,
filename,
shortFilename,
directory)
except KeyboardInterrupt:
os.remove(directory / filename)
os.remove(directory / audioName)
os.rename(directory / videoName, directory / filename)
@staticmethod
def _mergeAudio(video,audio,filename,shortFilename,directory):
inputVideo = str(directory / video)
inputAudio = str(directory / audio)
FNULL = open(os.devnull, 'w')
cmd = f"ffmpeg -i {inputAudio} -i {inputVideo} -c:v copy -c:a aac -strict experimental {str(directory / filename)}"
subprocess.call(cmd, stdout=FNULL, stderr=subprocess.STDOUT)
os.remove(directory / video)
os.remove(directory / audio)

View File

@@ -0,0 +1,51 @@
import os
import youtube_dl
import sys
from src.downloaders.downloaderUtils import getExtension, dlProgress, createHash
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.errors import FileAlreadyExistsError
class Youtube:
def __init__(self,directory,post):
if not os.path.exists(directory): os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post)
print(filename)
self.download(filename,directory,post['CONTENTURL'])
def download(self,filename,directory,url):
ydl_opts = {
"format": "best",
"outtmpl": str(directory / (filename + ".%(ext)s")),
"progress_hooks": [self._hook],
"playlistend": 1,
"nooverwrites": True,
"quiet": True
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
location = directory/(filename+".mp4")
if GLOBAL.arguments.no_dupes:
try:
fileHash = createHash(location)
except FileNotFoundError:
return None
if fileHash in GLOBAL.hashList:
os.remove(location)
raise FileAlreadyExistsError
GLOBAL.hashList.add(fileHash)
@staticmethod
def _hook(d):
if d['status'] == 'finished': return print("Downloaded")
downloadedMbs = int(d['downloaded_bytes'] * (10**(-6)))
fileSize = int(d['total_bytes']*(10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
sys.stdout.flush()

View File

@@ -1,31 +1,33 @@
import sys
class FauxTb(object):
def __init__(self, tb_frame, tb_lineno, tb_next):
self.tb_frame = tb_frame
self.tb_lineno = tb_lineno
self.tb_next = tb_next
def current_stack(skip=0):
try: 1/0
except ZeroDivisionError:
f = sys.exc_info()[2].tb_frame
for i in range(skip + 2):
f = f.f_back
lst = []
while f is not None:
lst.append((f, f.f_lineno))
f = f.f_back
return lst
def extend_traceback(tb, stack):
"""Extend traceback with stack info."""
head = tb
for tb_frame, tb_lineno in stack:
head = FauxTb(tb_frame, tb_lineno, head)
return head
def full_exc_info(exc_info):
def current_stack(skip=0):
try: 1/0
except ZeroDivisionError:
f = sys.exc_info()[2].tb_frame
for i in range(skip + 2):
f = f.f_back
lst = []
while f is not None:
lst.append((f, f.f_lineno))
f = f.f_back
return lst
def extend_traceback(tb, stack):
class FauxTb(object):
def __init__(self, tb_frame, tb_lineno, tb_next):
self.tb_frame = tb_frame
self.tb_lineno = tb_lineno
self.tb_next = tb_next
"""Extend traceback with stack info."""
head = tb
for tb_frame, tb_lineno in stack:
head = FauxTb(tb_frame, tb_lineno, head)
return head
"""Like sys.exc_info, but includes the full traceback."""
t, v, tb = exc_info
full_tb = extend_traceback(tb, current_stack(1))
@@ -86,4 +88,16 @@ class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass
class DirectLinkNotFound(Exception):
pass
class InvalidJSONFile(Exception):
pass
class FailedToDownload(Exception):
pass
class DomainInSkip(Exception):
pass

58
src/jsonHelper.py Normal file
View File

@@ -0,0 +1,58 @@
import json
from os import path, remove
from src.errors import InvalidJSONFile
class JsonFile:
""" Write and read JSON files
Use add(self,toBeAdded) to add to files
Use delete(self,*deletedKeys) to delete keys
"""
FILEDIR = ""
def __init__(self,FILEDIR):
self.FILEDIR = FILEDIR
if not path.exists(self.FILEDIR):
self.__writeToFile({},create=True)
def read(self):
try:
with open(self.FILEDIR, 'r') as f:
return json.load(f)
except json.decoder.JSONDecodeError:
raise InvalidJSONFile(f"{self.FILEDIR} cannot be read")
def add(self,toBeAdded,sub=None):
"""Takes a dictionary and merges it with json file.
It uses new key's value if a key already exists.
Returns the new content as a dictionary.
"""
data = self.read()
if sub: data[sub] = {**data[sub], **toBeAdded}
else: data = {**data, **toBeAdded}
self.__writeToFile(data)
return self.read()
def delete(self,*deleteKeys):
"""Delete given keys from JSON file.
Returns the new content as a dictionary.
"""
data = self.read()
for deleteKey in deleteKeys:
if deleteKey in data:
del data[deleteKey]
found = True
if not found:
return False
self.__writeToFile(data)
def __writeToFile(self,content,create=False):
if not create:
remove(self.FILEDIR)
with open(self.FILEDIR, 'w') as f:
json.dump(content, f, indent=4)

270
src/programMode.py Normal file
View File

@@ -0,0 +1,270 @@
from src.errors import SearchModeError, RedditorNameError, ProgramModeError, InvalidSortingType
from src.utils import GLOBAL
from src.parser import LinkDesigner
from pathlib import Path
import sys
class ProgramMode:
def __init__(self,arguments):
self.arguments = arguments
def generate(self):
try:
self._validateProgramMode()
except ProgramModeError:
self._promptUser()
programMode = {}
if self.arguments.user is not None:
programMode["user"] = self.arguments.user
if self.arguments.search is not None:
programMode["search"] = self.arguments.search
if self.arguments.sort == "hot" or \
self.arguments.sort == "controversial" or \
self.arguments.sort == "rising":
self.arguments.sort = "relevance"
if self.arguments.sort is not None:
programMode["sort"] = self.arguments.sort
else:
if self.arguments.submitted:
programMode["sort"] = "new"
else:
programMode["sort"] = "hot"
if self.arguments.time is not None:
programMode["time"] = self.arguments.time
else:
programMode["time"] = "all"
if self.arguments.link is not None:
self.arguments.link = self.arguments.link.strip("\"")
programMode = LinkDesigner(self.arguments.link)
if self.arguments.search is not None:
programMode["search"] = self.arguments.search
if self.arguments.sort is not None:
programMode["sort"] = self.arguments.sort
if self.arguments.time is not None:
programMode["time"] = self.arguments.time
elif self.arguments.subreddit is not None:
if type(self.arguments.subreddit) == list:
self.arguments.subreddit = "+".join(self.arguments.subreddit)
programMode["subreddit"] = self.arguments.subreddit
elif self.arguments.multireddit is not None:
programMode["multireddit"] = self.arguments.multireddit
elif self.arguments.saved is True:
programMode["saved"] = True
elif self.arguments.upvoted is True:
programMode["upvoted"] = True
elif self.arguments.submitted is not None:
programMode["submitted"] = True
if self.arguments.sort == "rising":
raise InvalidSortingType("Invalid sorting type has given")
programMode["limit"] = self.arguments.limit
return programMode
@staticmethod
def _chooseFrom(choices):
print()
choicesByIndex = list(str(x) for x in range(len(choices)+1))
for i in range(len(choices)):
print("{indent}[{order}] {mode}".format(
indent=" "*4,order=i+1,mode=choices[i]
))
print(" "*4+"[0] exit\n")
choice = input("> ")
while not choice.lower() in choices+choicesByIndex+["exit"]:
print("Invalid input\n")
input("> ")
if choice == "0" or choice == "exit":
sys.exit()
elif choice in choicesByIndex:
return choices[int(choice)-1]
else:
return choice
def _promptUser(self):
print("select program mode:")
programModes = [
"search","subreddit","multireddit",
"submitted","upvoted","saved","log"
]
programMode = self._chooseFrom(programModes)
if programMode == "search":
self.arguments.search = input("\nquery: ")
self.arguments.subreddit = input("\nsubreddit: ")
print("\nselect sort type:")
sortTypes = [
"relevance","top","new"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
if programMode == "subreddit":
subredditInput = input("(type frontpage for all subscribed subreddits,\n" \
" use plus to seperate multi subreddits:" \
" pics+funny+me_irl etc.)\n\n" \
"subreddit: ")
self.arguments.subreddit = subredditInput
# while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
# subredditInput = input("subreddit: ")
# self.arguments.subreddit += "+" + subredditInput
if " " in self.arguments.subreddit:
self.arguments.subreddit = "+".join(self.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
if not subredditInput.lower() == "frontpage" \
and self.arguments.subreddit[-1] == "+":
self.arguments.subreddit = self.arguments.subreddit[:-1]
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
if sortType in ["top","controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
else:
self.arguments.time = "all"
elif programMode == "multireddit":
self.arguments.user = input("\nmultireddit owner: ")
self.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
if sortType in ["top","controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
else:
self.arguments.time = "all"
elif programMode == "submitted":
self.arguments.submitted = True
self.arguments.user = input("\nredditor: ")
print("\nselect sort type:")
sortTypes = [
"hot","top","new","controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
if sortType == "top":
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
else:
self.arguments.time = "all"
elif programMode == "upvoted":
self.arguments.upvoted = True
self.arguments.user = input("\nredditor: ")
elif programMode == "saved":
self.arguments.saved = True
elif programMode == "log":
while True:
self.arguments.log = input("\nlog file directory:")
if Path(self.arguments.log).is_file():
break
while True:
try:
self.arguments.limit = int(input("\nlimit (0 for none): "))
if self.arguments.limit == 0:
self.arguments.limit = None
break
except ValueError:
pass
def _validateProgramMode(self):
"""Check if command-line self.arguments are given correcly,
if not, raise errors
"""
if self.arguments.user is None:
user = 0
else:
user = 1
search = 1 if self.arguments.search else 0
modes = [
"saved","subreddit","submitted","log","link","upvoted","multireddit"
]
values = {
x: 0 if getattr(self.arguments,x) is None or \
getattr(self.arguments,x) is False \
else 1 \
for x in modes
}
if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode")
if search+values["saved"] == 2:
raise SearchModeError("You cannot search in your saved posts")
if search+values["submitted"] == 2:
raise SearchModeError("You cannot search in submitted posts")
if search+values["upvoted"] == 2:
raise SearchModeError("You cannot search in upvoted posts")
if search+values["log"] == 2:
raise SearchModeError("You cannot search in log files")
if values["upvoted"]+values["submitted"] == 1 and user == 0:
raise RedditorNameError("No redditor name given")

98
src/reddit.py Normal file
View File

@@ -0,0 +1,98 @@
import praw
import random
import socket
import webbrowser
from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.utils import GLOBAL
from src.jsonHelper import JsonFile
from src. errors import RedditLoginFailed
class Reddit:
def __init__(self,refresh_token=None):
self.SCOPES = ['identity','history','read','save']
self.PORT = 7634
self.refresh_token = refresh_token
self.redditInstance = None
self.arguments = {
"client_id":GLOBAL.reddit_client_id,
"client_secret":GLOBAL.reddit_client_secret,
"user_agent":str(socket.gethostname())
}
def begin(self):
if self.refresh_token:
self.arguments["refresh_token"] = self.refresh_token
self.redditInstance = praw.Reddit(**self.arguments)
try:
self.redditInstance.auth.scopes()
return self.redditInstance
except ResponseException:
self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
else:
self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
JsonFile(GLOBAL.configDirectory).add({
"reddit_username": str(reddit.user.me()),
"reddit": refresh_token
},"credentials")
return self.redditInstance
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, client, message):
"""Send message to client and close the connection."""
client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("---Setting up the Reddit API---\n")
print("Go to this URL and login to reddit:\n",url,sep="\n",end="\n\n")
webbrowser.open(url,new=2)
client = self.recieve_connection()
data = client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(client,
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)

View File

@@ -2,6 +2,7 @@ import os
import sys
import random
import socket
import time
import webbrowser
import urllib.request
from urllib.error import HTTPError
@@ -9,477 +10,330 @@ from urllib.error import HTTPError
import praw
from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.utils import GLOBAL, createLogFile, jsonFile, printToFile
from src.reddit import Reddit
from src.utils import GLOBAL, createLogFile, printToFile
from src.jsonHelper import JsonFile
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
NoRedditSupport, MultiredditNotFound,
InvalidSortingType, RedditLoginFailed,
InsufficientPermission)
InsufficientPermission, DirectLinkNotFound)
print = printToFile
def beginPraw(config,user_agent = str(socket.gethostname())):
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, client, message):
"""Send message to client and close the connection."""
client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
client = self.recieve_connection()
data = client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(client,
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
"""Start reddit instance"""
scopes = ['identity','history','read']
port = "1337"
arguments = {
"client_id":GLOBAL.reddit_client_id,
"client_secret":GLOBAL.reddit_client_secret,
"user_agent":user_agent
}
if "reddit_refresh_token" in GLOBAL.config:
arguments["refresh_token"] = GLOBAL.config["reddit_refresh_token"]
reddit = praw.Reddit(**arguments)
try:
reddit.auth.scopes()
except ResponseException:
arguments["redirect_uri"] = "http://localhost:" + str(port)
reddit = praw.Reddit(**arguments)
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
else:
arguments["redirect_uri"] = "http://localhost:" + str(port)
reddit = praw.Reddit(**arguments)
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
return reddit
def getPosts(args):
"""Call PRAW regarding to arguments and pass it to redditSearcher.
Return what redditSearcher has returned.
def getPosts(programMode):
"""Call PRAW regarding to arguments and pass it to extractDetails.
Return what extractDetails has returned.
"""
config = GLOBAL.config
reddit = beginPraw(config)
reddit = Reddit(GLOBAL.config["credentials"]["reddit"]).begin()
if args["sort"] == "best":
if programMode["sort"] == "best":
raise NoPrawSupport("PRAW does not support that")
if "subreddit" in args:
if "search" in args:
if args["subreddit"] == "frontpage":
args["subreddit"] = "all"
if "subreddit" in programMode:
if "search" in programMode:
if programMode["subreddit"] == "frontpage":
programMode["subreddit"] = "all"
if "user" in args:
if args["user"] == "me":
args["user"] = str(reddit.user.me())
if "user" in programMode:
if programMode["user"] == "me":
programMode["user"] = str(reddit.user.me())
if not "search" in args:
if args["sort"] == "top" or args["sort"] == "controversial":
if not "search" in programMode:
if programMode["sort"] == "top" or programMode["sort"] == "controversial":
keyword_params = {
"time_filter":args["time"],
"limit":args["limit"]
"time_filter":programMode["time"],
"limit":programMode["limit"]
}
# OTHER SORT TYPES DON'T TAKE TIME_FILTER
else:
keyword_params = {
"limit":args["limit"]
"limit":programMode["limit"]
}
else:
keyword_params = {
"time_filter":args["time"],
"limit":args["limit"]
"time_filter":programMode["time"],
"limit":programMode["limit"]
}
if "search" in args:
if GLOBAL.arguments.sort in ["hot","rising","controversial"]:
if "search" in programMode:
if programMode["sort"] in ["hot","rising","controversial"]:
raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in args:
if "subreddit" in programMode:
print (
"search for \"{search}\" in\n" \
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
search=args["search"],
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
search=programMode["search"],
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
reddit.subreddit(args["subreddit"]).search(
args["search"],
limit=args["limit"],
sort=args["sort"],
time_filter=args["time"]
return extractDetails(
reddit.subreddit(programMode["subreddit"]).search(
programMode["search"],
limit=programMode["limit"],
sort=programMode["sort"],
time_filter=programMode["time"]
)
)
elif "multireddit" in args:
elif "multireddit" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "user" in args:
elif "user" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "saved" in args:
elif "saved" in programMode:
raise ("Reddit does not support that")
if args["sort"] == "relevance":
if programMode["sort"] == "relevance":
raise InvalidSortingType("Invalid sorting type has given")
if "saved" in args:
if "saved" in programMode:
print(
"saved posts\nuser:{username}\nlimit={limit}\n".format(
username=reddit.user.me(),
limit=args["limit"]
limit=programMode["limit"]
).upper(),noPrint=True
)
return redditSearcher(reddit.user.me().saved(limit=args["limit"]))
return extractDetails(reddit.user.me().saved(limit=programMode["limit"]))
if "subreddit" in args:
if "subreddit" in programMode:
if args["subreddit"] == "frontpage":
if programMode["subreddit"] == "frontpage":
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
getattr(reddit.front,args["sort"]) (**keyword_params)
return extractDetails(
getattr(reddit.front,programMode["sort"]) (**keyword_params)
)
else:
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
subreddit=args["subreddit"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
return extractDetails(
getattr(
reddit.subreddit(args["subreddit"]),args["sort"]
reddit.subreddit(programMode["subreddit"]),programMode["sort"]
) (**keyword_params)
)
elif "multireddit" in args:
elif "multireddit" in programMode:
print (
"user: {user}\n" \
"multireddit: {multireddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
user=args["user"],
limit=args["limit"],
sort=args["sort"],
multireddit=args["multireddit"],
time=args["time"]
user=programMode["user"],
limit=programMode["limit"],
sort=programMode["sort"],
multireddit=programMode["multireddit"],
time=programMode["time"]
).upper(),noPrint=True
)
try:
return redditSearcher(
return extractDetails(
getattr(
reddit.multireddit(
args["user"], args["multireddit"]
),args["sort"]
programMode["user"], programMode["multireddit"]
),programMode["sort"]
) (**keyword_params)
)
except NotFound:
raise MultiredditNotFound("Multireddit not found")
elif "submitted" in args:
elif "submitted" in programMode:
print (
"submitted posts of {user}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=args["limit"],
sort=args["sort"],
user=args["user"],
time=args["time"]
limit=programMode["limit"],
sort=programMode["sort"],
user=programMode["user"],
time=programMode["time"]
).upper(),noPrint=True
)
return redditSearcher(
return extractDetails(
getattr(
reddit.redditor(args["user"]).submissions,args["sort"]
reddit.redditor(programMode["user"]).submissions,programMode["sort"]
) (**keyword_params)
)
elif "upvoted" in args:
elif "upvoted" in programMode:
print (
"upvoted posts of {user}\nlimit: {limit}\n".format(
user=args["user"],
limit=args["limit"]
user=programMode["user"],
limit=programMode["limit"]
).upper(),noPrint=True
)
try:
return redditSearcher(
reddit.redditor(args["user"]).upvoted(limit=args["limit"])
return extractDetails(
reddit.redditor(programMode["user"]).upvoted(limit=programMode["limit"])
)
except Forbidden:
raise InsufficientPermission("You do not have permission to do that")
elif "post" in args:
print("post: {post}\n".format(post=args["post"]).upper(),noPrint=True)
return redditSearcher(
reddit.submission(url=args["post"]),SINGLE_POST=True
elif "post" in programMode:
print("post: {post}\n".format(post=programMode["post"]).upper(),noPrint=True)
return extractDetails(
reddit.submission(url=programMode["post"]),SINGLE_POST=True
)
def redditSearcher(posts,SINGLE_POST=False):
def extractDetails(posts,SINGLE_POST=False):
"""Check posts and decide if it can be downloaded.
If so, create a dictionary with post details and append them to a list.
Write all of posts to file. Return the list
"""
subList = []
global subCount
subCount = 0
global orderCount
orderCount = 0
global gfycatCount
gfycatCount = 0
global redgifsCount
redgifsCount = 0
global imgurCount
imgurCount = 0
global eromeCount
eromeCount = 0
global gifDeliveryNetworkCount
gifDeliveryNetworkCount = 0
global directCount
directCount = 0
global selfCount
selfCount = 0
postList = []
postCount = 0
allPosts = {}
print("\nGETTING POSTS")
if GLOBAL.arguments.verbose: print("\n")
postsFile = createLogFile("POSTS")
if SINGLE_POST:
submission = posts
subCount += 1
postCount += 1
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
except AttributeError:
pass
result = checkIfMatching(submission)
result = matchWithDownloader(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
details = {**details, **result}
postList.append(details)
postsFile.add({subCount:[details]})
postsFile.add({postCount:details})
else:
try:
for submission in posts:
subCount += 1
postCount += 1
if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
if postCount % 100 == 0:
sys.stdout.write("")
sys.stdout.flush()
if subCount % 1000 == 0:
if postCount % 1000 == 0:
sys.stdout.write("\n"+" "*14)
sys.stdout.flush()
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
except AttributeError:
continue
result = checkIfMatching(submission)
result = matchWithDownloader(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
details = {**details, **result}
postList.append(details)
allPosts[subCount] = [details]
allPosts[postCount] = details
except KeyboardInterrupt:
print("\nKeyboardInterrupt",noPrint=True)
postsFile.add(allPosts)
if not len(subList) == 0:
if GLOBAL.arguments.NoDownload or GLOBAL.arguments.verbose:
print(
f"\n\nTotal of {len(subList)} submissions found!"
)
print(
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
f"and {selfCount} SELF POSTS",noPrint=True
)
else:
print()
return subList
if not len(postList) == 0:
print()
return postList
else:
raise NoMatchingSubmissionFound("No matching submission was found")
def checkIfMatching(submission):
global gfycatCount
global redgifsCount
global imgurCount
global eromeCount
global directCount
global gifDeliveryNetworkCount
global selfCount
def matchWithDownloader(submission):
try:
details = {'postId':submission.id,
'postTitle':submission.title,
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
return None
if 'v.redd.it' in submission.domain:
bitrates = ["DASH_1080","DASH_720","DASH_600", \
"DASH_480","DASH_360","DASH_240"]
for bitrate in bitrates:
videoURL = submission.url+"/"+bitrate
try:
responseCode = urllib.request.urlopen(videoURL).getcode()
except urllib.error.HTTPError:
responseCode = 0
if responseCode == 200:
return {'TYPE': 'v.redd.it', 'CONTENTURL': videoURL}
if 'gfycat' in submission.domain:
details['postType'] = 'gfycat'
gfycatCount += 1
return details
return {'TYPE': 'gfycat'}
if 'youtube' in submission.domain \
and 'watch' in submission.url:
return {'TYPE': 'youtube'}
if 'youtu.be' in submission.domain:
url = urllib.request.urlopen(submission.url).geturl()
if 'watch' in url:
return {'TYPE': 'youtube'}
elif 'imgur' in submission.domain:
details['postType'] = 'imgur'
imgurCount += 1
return details
return {'TYPE': 'imgur'}
elif 'erome' in submission.domain:
details['postType'] = 'erome'
eromeCount += 1
return details
return {'TYPE': 'erome'}
elif 'redgifs' in submission.domain:
details['postType'] = 'redgifs'
redgifsCount += 1
return details
return {'TYPE': 'redgifs'}
elif 'gifdeliverynetwork' in submission.domain:
details['postType'] = 'gifdeliverynetwork'
gifDeliveryNetworkCount += 1
return details
return {'TYPE': 'gifdeliverynetwork'}
elif submission.is_self:
details['postType'] = 'self'
details['postContent'] = submission.selftext
selfCount += 1
return details
directLink = isDirectLink(submission.url)
if directLink is not False:
details['postType'] = 'direct'
details['postURL'] = directLink
directCount += 1
return details
def printSubmission(SUB,validNumber,totalNumber):
"""Print post's link, title and media link to screen"""
print(validNumber,end=") ")
print(totalNumber,end=" ")
print(
"https://www.reddit.com/"
+"r/"
+SUB.subreddit.display_name
+"/comments/"
+SUB.id
)
print(" "*(len(str(validNumber))
+(len(str(totalNumber)))+3),end="")
elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
return {'TYPE': 'self',
'CONTENT': submission.selftext}
try:
print(SUB.title)
except:
SUB.title = "unnamed"
print("SUBMISSION NAME COULD NOT BE READ")
pass
return {'TYPE': 'direct',
'CONTENTURL': extractDirectLink(submission.url)}
except DirectLinkNotFound:
return None
print(" "*(len(str(validNumber))+(len(str(totalNumber)))+3),end="")
print(SUB.url,end="\n\n")
def isDirectLink(URL):
def extractDirectLink(URL):
"""Check if link is a direct image link.
If so, return URL,
if not, return False
@@ -508,10 +362,10 @@ def isDirectLink(URL):
return videoURL
else:
return False
raise DirectLinkNotFound
for extension in imageTypes:
if extension in URL.split("/")[-1]:
return URL
else:
return False
raise DirectLinkNotFound

24
src/store.py Normal file
View File

@@ -0,0 +1,24 @@
from os import path
class Store:
def __init__(self,directory=None):
self.directory = directory
if self.directory:
if path.exists(directory):
with open(directory, 'r') as f:
self.list = f.read().split("\n")
else:
with open(self.directory, 'a'):
pass
self.list = []
else:
self.list = []
def __call__(self):
return self.list
def add(self, filehash):
self.list.append(filehash)
if self.directory:
with open(self.directory, 'a') as f:
f.write("{filehash}\n".format(filehash=filehash))

View File

@@ -1,91 +1,41 @@
import io
import json
import sys
import time
from os import makedirs, path, remove
from pathlib import Path
from src.jsonHelper import JsonFile
from src.errors import FileNotFoundError
class GLOBAL:
"""Declare global variables"""
RUN_TIME = 0
RUN_TIME = ""
config = {'imgur_client_id':None, 'imgur_client_secret': None}
arguments = None
directory = None
defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit"
configDirectory = ""
reddit_client_id = "BSyphDdxYZAgVQ"
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
reddit_client_id = "U-6gk4ZCh3IeNQ"
reddit_client_secret = "7CZHY6AmKweZME5s50SfDGylaPg"
hashList = set()
downloadedPosts = lambda: []
printVanilla = print
class jsonFile:
""" Write and read JSON files
Use add(self,toBeAdded) to add to files
Use delete(self,*deletedKeys) to delete keys
"""
FILEDIR = ""
def __init__(self,FILEDIR):
self.FILEDIR = FILEDIR
if not path.exists(self.FILEDIR):
self.__writeToFile({},create=True)
def read(self):
with open(self.FILEDIR, 'r') as f:
return json.load(f)
def add(self,toBeAdded):
"""Takes a dictionary and merges it with json file.
It uses new key's value if a key already exists.
Returns the new content as a dictionary.
"""
data = self.read()
data = {**data, **toBeAdded}
self.__writeToFile(data)
return self.read()
def delete(self,*deleteKeys):
"""Delete given keys from JSON file.
Returns the new content as a dictionary.
"""
data = self.read()
for deleteKey in deleteKeys:
if deleteKey in data:
del data[deleteKey]
found = True
if not found:
return False
self.__writeToFile(data)
def __writeToFile(self,content,create=False):
if not create:
remove(self.FILEDIR)
with open(self.FILEDIR, 'w') as f:
json.dump(content, f, indent=4)
def createLogFile(TITLE):
"""Create a log file with given name
inside a folder time stampt in its name and
put given arguments inside \"HEADER\" key
"""
folderDirectory = GLOBAL.directory / "LOG_FILES" / \
str(time.strftime(
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
))
folderDirectory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME
logFilename = TITLE.upper()+'.json'
if not path.exists(folderDirectory):
makedirs(folderDirectory)
FILE = jsonFile(folderDirectory / Path(logFilename))
FILE = JsonFile(folderDirectory / Path(logFilename))
HEADER = " ".join(sys.argv)
FILE.add({"HEADER":HEADER})
@@ -96,9 +46,7 @@ def printToFile(*args, noPrint=False,**kwargs):
CONSOLE LOG file in a folder time stampt in the name
"""
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
folderDirectory = GLOBAL.directory / Path("LOG_FILES") / Path(GLOBAL.RUN_TIME)
if not noPrint or \
GLOBAL.arguments.verbose or \
@@ -134,12 +82,11 @@ def nameCorrector(string):
spacesRemoved.append(string[b])
string = ''.join(spacesRemoved)
correctedString = []
if len(string.split('\n')) > 1:
string = "".join(string.split('\n'))
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.','#']
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','#']
if any(x in string for x in BAD_CHARS):
for char in string: