(maint) code clean up (#187)

## bdfr

- Add the bound instance as method parameter
- Change methods not using its bound instance to staticmethods
- Fix dangerous default argument
- Refactor the comparison involving `not`
- Refactor unnecessary `else` / `elif` when `if` block has a `raise` statement
- Refactor unnecessary `else` / `elif` when `if` block has a `return` statement
- Refactor useless `else` block in the loop
- Remove implicit `object` from the base class
- Remove reimported module
- Remove unnecessary generator
- Remove unnecessary return statement
- Remove unnecessary use of comprehension
- Remove unused imports
- Use `is` to compare type of objects
- Using not x can cause unwanted results

## Dockerfile

- use a pinned Python version tag instead of latest
- leverage cached requirements

Signed-off-by: Vladislav Doster <mvdoster@gmail.com>

Co-authored-by: Ali Parlakçı <parlakciali@gmail.com>
This commit is contained in:
vlad doster
2021-02-25 03:32:06 -06:00
committed by GitHub
parent e0a2d2eda0
commit fc42afbabe
24 changed files with 781 additions and 663 deletions

View File

@@ -1,9 +1,31 @@
FROM python:latest
# Bulk Downloader for Reddit
#
# VERSION 0.0.1
FROM python:3.8-slim-buster
LABEL Description="This image enables running Buld Downloader for Reddit with in a container environment" Version="0.0.1"
ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
WORKDIR "/root/Bulk Downloader for Reddit"
COPY ./requirements.txt ./
RUN ["pip", "install", "-r", "requirements.txt"]
EXPOSE 8080
EXPOSE 7634
CMD ["python", "script.py", "-d", "downloads"]
# Install dependencies for building Python packages
RUN apt-get update \
&& apt-get install -y build-essential \
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
# Requirements are installed here to ensure they will be cached.
COPY requirements.txt /requirements.txt
RUN pip install --no-cache-dir -r /requirements.txt \
&& rm -rf /requirements.txt
# Copy project files into container
COPY . /bdfr
WORKDIR /bdfr
# This is useful because the image name can double as a reference to the binary
ENTRYPOINT ["python", "script.py"]
CMD ["--help"]

View File

@@ -1,12 +1,17 @@
version: "3"
services:
bdfr:
build:
context: .
dockerfile: ./Dockerfile
image: bdfr
build: .
container_name: bdfr
ports:
- "8080:8080"
- "7634:7634"
volumes:
- "./:/root/Bulk Downloader for Reddit"
- .:/bdfr:z
container_name: bdfr_container
network_mode: bridge

118
script.py
View File

@@ -4,15 +4,12 @@
This program downloads imgur, gfycat and direct image and video links of
saved posts from a reddit account. It is written in Python 3.
"""
import argparse
import logging
import os
import sys
import time
import webbrowser
from io import StringIO
from pathlib import Path, PurePath
from pathlib import Path
from prawcore.exceptions import InsufficientScope
from src.downloaders.Direct import Direct
@@ -26,8 +23,7 @@ from src.downloaders.vreddit import VReddit
from src.downloaders.youtube import Youtube
from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork
from src.downloaders.gallery import gallery
from src.errors import ImgurLimitError, NoSuitablePost, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info
from src.parser import LinkDesigner
from src.errors import ImgurLimitError, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info
from src.searcher import getPosts
from src.utils import (GLOBAL, createLogFile, nameCorrector,
printToFile)
@@ -44,6 +40,7 @@ __version__ = "1.9.4"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
def postFromLog(fileName):
"""Analyze a log file and return a list of dictionaries containing
submissions
@@ -62,17 +59,19 @@ def postFromLog(fileName):
posts = []
for post in content:
if not content[post][-1]['TYPE'] == None:
if content[post][-1]['TYPE'] is not None:
posts.append(content[post][-1])
return posts
def isPostExists(POST,directory):
def isPostExists(POST, directory):
"""Figure out a file's name and checks if the file already exists"""
filename = GLOBAL.config['filename'].format(**POST)
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md",".mkv",".flv"]
possibleExtensions = [".jpg", ".png", ".mp4",
".gif", ".webm", ".md", ".mkv", ".flv"]
for extension in possibleExtensions:
@@ -81,10 +80,10 @@ def isPostExists(POST,directory):
if path.exists():
return True
else:
return False
return False
def downloadPost(SUBMISSION,directory):
def downloadPost(SUBMISSION, directory):
downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost,
@@ -94,11 +93,10 @@ def downloadPost(SUBMISSION,directory):
print()
if SUBMISSION['TYPE'] in downloaders:
downloaders[SUBMISSION['TYPE']] (directory,SUBMISSION)
downloaders[SUBMISSION['TYPE']](directory, SUBMISSION)
else:
raise NoSuitablePost
return None
def download(submissions):
"""Analyze list of submissions and call the right function
@@ -116,30 +114,31 @@ def download(submissions):
subsLenght = len(submissions)
for i in range(len(submissions)):
print(f"\n({i+1}/{subsLenght})",end="")
print(f"\n({i+1}/{subsLenght})", end="")
print(submissions[i]['POSTID'],
f"r/{submissions[i]['SUBREDDIT']}",
f"u/{submissions[i]['REDDITOR']}",
submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "",
sep="",
end="")
print(f" {submissions[i]['TYPE'].upper()}",end="",noPrint=True)
print(f" {submissions[i]['TYPE'].upper()}", end="", noPrint=True)
directory = GLOBAL.directory / GLOBAL.config["folderpath"].format(**submissions[i])
directory = GLOBAL.directory / \
GLOBAL.config["folderpath"].format(**submissions[i])
details = {
**submissions[i],
**{
"TITLE": nameCorrector(
submissions[i]['TITLE'],
reference = str(directory)
+ GLOBAL.config['filename'].format(**submissions[i])
+ ".ext"
reference=str(directory)
+ GLOBAL.config['filename'].format(**submissions[i])
+ ".ext"
)
}
}
filename = GLOBAL.config['filename'].format(**details)
if isPostExists(details,directory):
if isPostExists(details, directory):
print()
print(directory)
print(filename)
@@ -154,7 +153,7 @@ def download(submissions):
continue
try:
downloadPost(details,directory)
downloadPost(details, directory)
GLOBAL.downloadedPosts.add(details['POSTID'])
try:
if GLOBAL.arguments.unsave:
@@ -172,15 +171,16 @@ def download(submissions):
except ImgurLoginError:
print(
"Imgur login failed. \nQuitting the program "\
"Imgur login failed. \nQuitting the program "
"as unexpected errors might occur."
)
sys.exit()
except ImgurLimitError as exception:
FAILED_FILE.add({int(i+1):[
FAILED_FILE.add({int(i+1): [
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
class_name=exception.__class__.__name__, info=str(
exception)
),
details
]})
@@ -188,12 +188,14 @@ def download(submissions):
except NotADownloadableLinkError as exception:
print(
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
class_name=exception.__class__.__name__, info=str(
exception)
)
)
FAILED_FILE.add({int(i+1):[
FAILED_FILE.add({int(i+1): [
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
class_name=exception.__class__.__name__, info=str(
exception)
),
submissions[i]
]})
@@ -215,9 +217,9 @@ def download(submissions):
print("Failed to download the posts, skipping...")
except AlbumNotDownloadedCompletely:
print("Album did not downloaded completely.")
FAILED_FILE.add({int(i+1):[
FAILED_FILE.add({int(i+1): [
"{class_name}: {info}".format(
class_name=exc.__class__.__name__,info=str(exc)
class_name=exc.__class__.__name__, info=str(exc)
),
submissions[i]
]})
@@ -225,39 +227,42 @@ def download(submissions):
except Exception as exc:
print(
"{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exc.__class__.__name__,info=str(exc)
class_name=exc.__class__.__name__, info=str(exc)
)
)
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(),noPrint=True)
print(GLOBAL.log_stream.getvalue(), noPrint=True)
FAILED_FILE.add({int(i+1):[
FAILED_FILE.add({int(i+1): [
"{class_name}: {info}".format(
class_name=exc.__class__.__name__,info=str(exc)
class_name=exc.__class__.__name__, info=str(exc)
),
submissions[i]
]})
if duplicates:
print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
print(f"\nThere {'were' if duplicates > 1 else 'was'} "
f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloadedCount == 0:
print("Nothing is downloaded :(")
else:
print(f"Total of {downloadedCount} " \
if downloadedCount:
print(f"Total of {downloadedCount} "
f"link{'s' if downloadedCount > 1 else ''} downloaded!")
else:
print("Nothing is downloaded :(")
def printLogo():
VanillaPrint(
f"\nBulk Downloader for Reddit v{__version__}\n" \
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n" \
f"\nBulk Downloader for Reddit v{__version__}\n"
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n"
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n"
)
def main():
if Path("config.json").exists():
@@ -265,11 +270,11 @@ def main():
else:
if not Path(GLOBAL.defaultConfigDirectory).is_dir():
os.makedirs(GLOBAL.defaultConfigDirectory)
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
try:
GLOBAL.config = Config(GLOBAL.configDirectory).generate()
except InvalidJSONFile as exception:
VanillaPrint(str(exception.__class__.__name__),">>",str(exception))
VanillaPrint(str(exception.__class__.__name__), ">>", str(exception))
VanillaPrint("Resolve it or remove it to proceed")
input("\nPress enter to quit")
sys.exit()
@@ -302,7 +307,8 @@ def main():
if arguments.directory:
GLOBAL.directory = Path(arguments.directory.strip())
elif "default_directory" in GLOBAL.config and GLOBAL.config["default_directory"] != "":
GLOBAL.directory = Path(GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME))
GLOBAL.directory = Path(
GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME))
else:
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
@@ -312,14 +318,13 @@ def main():
GLOBAL.downloadedPosts = Store()
printLogo()
print("\n"," ".join(sys.argv),"\n",noPrint=True)
print("\n", " ".join(sys.argv), "\n", noPrint=True)
if arguments.log is not None:
logDir = Path(arguments.log)
download(postFromLog(logDir))
sys.exit()
programMode = ProgramMode(arguments).generate()
try:
@@ -327,7 +332,7 @@ def main():
except Exception as exc:
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(),noPrint=True)
print(GLOBAL.log_stream.getvalue(), noPrint=True)
print(exc)
sys.exit()
@@ -335,8 +340,11 @@ def main():
print("I could not find any posts in that URL")
sys.exit()
if GLOBAL.arguments.no_download: pass
else: download(posts)
if GLOBAL.arguments.no_download:
pass
else:
download(posts)
if __name__ == "__main__":
@@ -347,9 +355,9 @@ if __name__ == "__main__":
VanillaPrint = print
print = printToFile
GLOBAL.RUN_TIME = str(time.strftime(
"%d-%m-%Y_%H-%M-%S",
time.localtime(time.time())
))
"%d-%m-%Y_%H-%M-%S",
time.localtime(time.time())
))
main()
except KeyboardInterrupt:
@@ -363,5 +371,5 @@ if __name__ == "__main__":
exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue())
if not GLOBAL.arguments.quit: input("\nPress enter to quit\n")
if not GLOBAL.arguments.quit:
input("\nPress enter to quit\n")

View File

@@ -1,31 +1,34 @@
import argparse
import sys
class Arguments:
@staticmethod
def parse(arguments=[]):
def parse(arguments=None):
"""Initialize argparse and add arguments"""
if arguments is None:
arguments = []
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads " \
"media from reddit " \
"posts")
parser.add_argument("--directory","-d",
help="Specifies the directory where posts will be " \
description="This program downloads "
"media from reddit "
"posts")
parser.add_argument("--directory", "-d",
help="Specifies the directory where posts will be "
"downloaded to",
metavar="DIRECTORY")
parser.add_argument("--verbose","-v",
parser.add_argument("--verbose", "-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit","-q",
parser.add_argument("--quit", "-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link","-l",
parser.add_argument("--link", "-l",
help="Get posts from link",
metavar="link")
@@ -47,43 +50,45 @@ class Arguments:
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Takes a log file which created by itself " \
"(json files), reads posts and tries downloadin" \
"g them again.",
help="Takes a log file which created by itself "
"(json files), reads posts and tries downloadin"
"g them again.",
# type=argparse.FileType('r'),
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's " \
"name without r/. use \"frontpage\" for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument(
"--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's "
"name without r/. use \"frontpage\" for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes "\
"multireddit's name without m/",
help="Triggers multireddit mode and takes "
"multireddit's name without m/",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for " \
"current user",
required="--multireddit" in sys.argv or \
"--submitted" in sys.argv,
help="reddit username if needed. use \"me\" for "
"current user",
required="--multireddit" in sys.argv or
"--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument("--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument(
"--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising " \
"or relevance default: hot",
help="Either hot, top, new, controversial, rising "
"or relevance default: hot",
choices=[
"hot","top","new","controversial","rising",
"hot", "top", "new", "controversial", "rising",
"relevance"
],
metavar="SORT TYPE",
@@ -95,9 +100,10 @@ class Arguments:
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all." \
" default: all",
choices=["all","hour","day","week","month","year"],
help="Either hour, day, week, month, year or all."
" default: all",
choices=["all", "hour", "day",
"week", "month", "year"],
metavar="TIME_LIMIT",
type=str)
@@ -105,7 +111,7 @@ class Arguments:
nargs="+",
help="Skip posts with given type",
type=str,
choices=["images","videos","gifs","self"],
choices=["images", "videos", "gifs", "self"],
default=[])
parser.add_argument("--skip-domain",
@@ -124,38 +130,40 @@ class Arguments:
help="Set custom filename",
)
parser.add_argument("--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument(
"--set-default-directory",
action="store_true",
help="Set a default directory to be used in case no directory is given",
)
parser.add_argument("--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument(
"--set-default-options",
action="store_true",
help="Set default options to use everytime program runs",
)
parser.add_argument("--use-local-config",
action="store_true",
help="Creates a config file in the program's directory and uses it. Useful for having multiple configs",
)
parser.add_argument(
"--use-local-config",
action="store_true",
help="Creates a config file in the program's directory and uses it. Useful for having multiple configs",
)
parser.add_argument("--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument(
"--no-dupes",
action="store_true",
help="Do not download duplicate posts on different subreddits",
)
parser.add_argument("--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str
)
parser.add_argument("--no-download",
action="store_true",
help="Just saved posts into a the POSTS.json file without downloading"
)
parser.add_argument(
"--downloaded-posts",
help="Use a hash file to keep track of downloaded files",
type=str)
parser.add_argument(
"--no-download",
action="store_true",
help="Just saved posts into a the POSTS.json file without downloading")
if arguments == []:
return parser.parse_args()
else:
return parser.parse_args(arguments)
return parser.parse_args(arguments)

View File

@@ -1,15 +1,12 @@
import os
import socket
import webbrowser
import random
from src.reddit import Reddit
from src.jsonHelper import JsonFile
from src.utils import nameCorrector
class Config():
def __init__(self,filename):
def __init__(self, filename):
self.filename = filename
self.file = JsonFile(self.filename)
@@ -45,7 +42,7 @@ Existing filename template:""", None if "filename" not in self.file.read() else
def _readCustomFileName(self):
content = self.file.read()
if not "filename" in content:
if "filename" not in content:
self.file.add({
"filename": "{REDDITOR}_{TITLE}_{POSTID}"
})
@@ -75,9 +72,9 @@ Existing folder structure""", None if "folderpath" not in self.file.read() else
"folderpath": folderpath
})
def _readCustomFolderPath(self,path=None):
def _readCustomFolderPath(self, path=None):
content = self.file.read()
if not "folderpath" in content:
if "folderpath" not in content:
self.file.add({
"folderpath": "{SUBREDDIT}"
})
@@ -96,9 +93,9 @@ Existing default options:""", None if "options" not in self.file.read() else sel
"options": options
})
def _readDefaultOptions(self,path=None):
def _readDefaultOptions(self, path=None):
content = self.file.read()
if not "options" in content:
if "options" not in content:
self.file.add({
"options": ""
})
@@ -108,9 +105,9 @@ Existing default options:""", None if "options" not in self.file.read() else sel
try:
content = self.file.read()["credentials"]
except:
except BaseException:
self.file.add({
"credentials":{}
"credentials": {}
})
content = self.file.read()["credentials"]
@@ -126,7 +123,8 @@ Existing default options:""", None if "options" not in self.file.read() else sel
Leave blank to reset it. You can use {time} in foler names to use to timestamp it
For example: D:/archive/BDFR_{time}
""")
print("Current default directory:", self.file.read()["default_directory"] if "default_directory" in self.file.read() else "")
print("Current default directory:", self.file.read()[
"default_directory"] if "default_directory" in self.file.read() else "")
self.file.add({
"default_directory": input(">> ")
})

View File

@@ -1,18 +1,16 @@
import os
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import FileNameTooLong
from src.utils import GLOBAL
from src.utils import printToFile as print
class Direct:
def __init__(self,directory,POST):
def __init__(self, directory, POST):
POST['EXTENSION'] = getExtension(POST['CONTENTURL'])
if not os.path.exists(directory): os.makedirs(directory)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['CONTENTURL'])
filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"]
shortFilename = POST['POSTID'] + POST['EXTENSION']
getFile(filename, shortFilename, directory, POST['CONTENTURL'])

View File

@@ -1,19 +1,18 @@
import os
import logging
import sys
import urllib.request
from html.parser import HTMLParser
from src.downloaders.downloaderUtils import getFile
from src.downloaders.downloaderUtils import getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError, full_exc_info)
from src.errors import (AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import GLOBAL
from src.utils import printToFile as print
class Erome:
def __init__(self,directory,post):
def __init__(self, directory, post):
try:
IMAGES = self.getLinks(post['CONTENTURL'])
except urllib.error.HTTPError:
@@ -29,14 +28,15 @@ class Erome:
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**post)+post["EXTENSION"]
filename = GLOBAL.config['filename'].format(
**post) + post["EXTENSION"]
shortFilename = post['POSTID'] + extension
imageURL = IMAGES[0]
if 'https://' not in imageURL or 'http://' not in imageURL:
imageURL = "https://" + imageURL
getFile(filename,shortFilename,directory,imageURL)
getFile(filename, shortFilename, directory, imageURL)
else:
filename = GLOBAL.config['filename'].format(**post)
@@ -56,19 +56,19 @@ class Erome:
extension = getExtension(IMAGES[i])
filename = str(i+1)+extension
filename = str(i + 1) + extension
imageURL = IMAGES[i]
if 'https://' not in imageURL and 'http://' not in imageURL:
imageURL = "https://" + imageURL
print(" ({}/{})".format(i+1,imagesLenght))
print(" ({}/{})".format(i + 1, imagesLenght))
print(" {}".format(filename))
try:
getFile(filename,filename,folderDir,imageURL,indent=2)
getFile(filename, filename, folderDir, imageURL, indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
print(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
howManyDownloaded -= 1
@@ -87,20 +87,21 @@ class Erome:
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
if howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
def getLinks(self, url, lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
self.tag = {tag: {attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
@@ -124,12 +125,12 @@ class Erome:
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
if tag["img"]["class"] == "img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
link for link in content
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]

View File

@@ -4,14 +4,13 @@ import urllib.request
from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.errors import (NotADownloadableLinkError)
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork
class Gfycat:
def __init__(self,directory,POST):
def __init__(self, directory, POST):
try:
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
@@ -19,12 +18,13 @@ class Gfycat:
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"]
shortFilename = POST['POSTID'] + POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
getFile(filename, shortFilename, directory, POST['MEDIAURL'])
@staticmethod
def getLink(url):
@@ -43,8 +43,9 @@ class Gfycat:
pageSource = (urllib.request.urlopen(url).read().decode())
soup = BeautifulSoup(pageSource, "html.parser")
attributes = {"data-react-helmet":"true","type":"application/ld+json"}
content = soup.find("script",attrs=attributes)
attributes = {"data-react-helmet": "true",
"type": "application/ld+json"}
content = soup.find("script", attrs=attributes)
if content is None:
return GifDeliveryNetwork.getLink(url)

View File

@@ -1,7 +1,5 @@
import urllib
import json
import os
import time
import requests
from src.utils import GLOBAL, nameCorrector
@@ -10,16 +8,17 @@ from src.downloaders.Direct import Direct
from src.downloaders.downloaderUtils import getFile
from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip
class Imgur:
IMGUR_IMAGE_DOMAIN = "https://i.imgur.com/"
def __init__(self,directory, post):
def __init__(self, directory, post):
link = post['CONTENTURL']
if link.endswith(".gifv"):
link = link.replace(".gifv",".mp4")
link = link.replace(".gifv", ".mp4")
Direct(directory, {**post, 'CONTENTURL': link})
return None
@@ -57,22 +56,23 @@ class Imgur:
extension = self.validateExtension(images["images"][i]["ext"])
imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i]["hash"] + extension
imageURL = self.IMGUR_IMAGE_DOMAIN + \
images["images"][i]["hash"] + extension
filename = "_".join([
str(i+1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash']
]) + extension
shortFilename = str(i+1) + "_" + images["images"][i]['hash']
filename = "_".join([str(i + 1),
nameCorrector(images["images"][i]['title']),
images["images"][i]['hash']]) + extension
shortFilename = str(i + 1) + "_" + images["images"][i]['hash']
print("\n ({}/{})".format(i+1,imagesLenght))
print("\n ({}/{})".format(i + 1, imagesLenght))
try:
getFile(filename,shortFilename,folderDir,imageURL,indent=2)
getFile(filename, shortFilename, folderDir, imageURL, indent=2)
howManyDownloaded += 1
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
print(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
@@ -82,18 +82,16 @@ class Imgur:
except Exception as exception:
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
" " +
"{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
print(GLOBAL.log_stream.getvalue(),noPrint=True)
info=str(exception)) +
"\n")
print(GLOBAL.log_stream.getvalue(), noPrint=True)
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
if howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
@@ -103,9 +101,9 @@ class Imgur:
imageURL = self.IMGUR_IMAGE_DOMAIN + image["hash"] + extension
filename = GLOBAL.config['filename'].format(**self.post) + extension
shortFilename = self.post['POSTID']+extension
shortFilename = self.post['POSTID'] + extension
getFile(filename,shortFilename,self.directory,imageURL)
getFile(filename, shortFilename, self.directory, imageURL)
@property
def isAlbum(self):
@@ -116,7 +114,9 @@ class Imgur:
cookies = {"over18": "1", "postpagebeta": "0"}
res = requests.get(link, cookies=cookies)
if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
if res.status_code != 200:
raise ImageNotFound(
f"Server responded with {res.status_code} to {link}")
pageSource = requests.get(link, cookies=cookies).text
STARTING_STRING = "image : "
@@ -124,18 +124,20 @@ class Imgur:
STARTING_STRING_LENGHT = len(STARTING_STRING)
try:
startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
startIndex = pageSource.index(
STARTING_STRING) + STARTING_STRING_LENGHT
endIndex = pageSource.index(ENDING_STRING, startIndex)
except ValueError:
raise NotADownloadableLinkError(f"Could not read the page source on {link}")
raise NotADownloadableLinkError(
f"Could not read the page source on {link}")
while pageSource[endIndex] != "}":
endIndex=endIndex-1
endIndex = endIndex - 1
try:
data = pageSource[startIndex:endIndex+2].strip()[:-1]
except:
pageSource[endIndex+1]='}'
data = pageSource[startIndex:endIndex+3].strip()[:-1]
data = pageSource[startIndex:endIndex + 2].strip()[:-1]
except BaseException:
pageSource[endIndex + 1] = '}'
data = pageSource[startIndex:endIndex + 3].strip()[:-1]
return json.loads(data)
@@ -144,5 +146,8 @@ class Imgur:
POSSIBLE_EXTENSIONS = [".jpg", ".png", ".mp4", ".gif"]
for extension in POSSIBLE_EXTENSIONS:
if extension in string: return extension
else: raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.")
if extension in string:
return extension
raise ExtensionError(
f"\"{string}\" is not recognized as a valid extension.")

View File

@@ -1,45 +1,51 @@
import sys
import os
import time
from urllib.error import HTTPError
import urllib.request
from pathlib import Path
import hashlib
from src.utils import nameCorrector, GLOBAL
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, TypeInSkip, DomainInSkip
from src.errors import FileAlreadyExistsError, FailedToDownload, TypeInSkip, DomainInSkip
def dlProgress(count, blockSize, totalSize):
"""Function for writing download progress to console
"""
downloadedMbs = int(count*blockSize*(10**(-6)))
fileSize = int(totalSize*(10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
downloadedMbs = int(count * blockSize * (10**(-6)))
fileSize = int(totalSize * (10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize))
sys.stdout.flush()
def getExtension(link):
"""Extract file extension from image link.
If didn't find any, return '.jpg'
"""
imageTypes = ['jpg','png','mp4','webm','gif']
imageTypes = ['jpg', 'png', 'mp4', 'webm', 'gif']
parsed = link.split('.')
for fileType in imageTypes:
if fileType in parsed:
return "."+parsed[-1]
else:
if not "v.redd.it" in link:
return '.jpg'
else:
return '.mp4'
return "." + parsed[-1]
def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
if "v.redd.it" not in link:
return '.jpg'
return '.mp4'
def getFile(
filename,
shortFilename,
folderDir,
imageURL,
indent=0,
silent=False):
FORMATS = {
"videos": [".mp4", ".webm"],
"images": [".jpg",".jpeg",".png",".bmp"],
"images": [".jpg", ".jpeg", ".png", ".bmp"],
"gifs": [".gif"],
"self": []
}
@@ -53,10 +59,10 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
raise DomainInSkip
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;" \
("Accept", "text/html,application/xhtml+xml,application/xml;"
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
@@ -64,21 +70,22 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
("Connection", "keep-alive")
]
if not os.path.exists(folderDir): os.makedirs(folderDir)
if not os.path.exists(folderDir):
os.makedirs(folderDir)
opener = urllib.request.build_opener()
if not "imgur" in imageURL:
if "imgur" not in imageURL:
opener.addheaders = headers
urllib.request.install_opener(opener)
if not silent: print(" "*indent + str(folderDir),
" "*indent + str(filename),
sep="\n")
if not silent:
print(" " * indent + str(folderDir),
" " * indent + str(filename),
sep="\n")
for i in range(3):
fileDir = Path(folderDir) / filename
tempDir = Path(folderDir) / (filename+".tmp")
tempDir = Path(folderDir) / (filename + ".tmp")
if not (os.path.isfile(fileDir)):
try:
@@ -93,8 +100,9 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(fileHash)
os.rename(tempDir,fileDir)
if not silent: print(" "*indent+"Downloaded"+" "*10)
os.rename(tempDir, fileDir)
if not silent:
print(" " * indent + "Downloaded" + " " * 10)
return None
except ConnectionResetError:
raise FailedToDownload
@@ -104,6 +112,7 @@ def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
raise FileAlreadyExistsError
raise FailedToDownload
def createHash(filename):
hash_md5 = hashlib.md5()
with open(filename, "rb") as f:

View File

@@ -1,18 +1,16 @@
import io
import os
import json
import urllib
import requests
from pathlib import Path
from src.utils import GLOBAL, nameCorrector
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.downloaders.Direct import Direct
from src.downloaders.downloaderUtils import getFile
from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip
from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, NotADownloadableLinkError, TypeInSkip
class gallery:
def __init__(self,directory,post):
def __init__(self, directory, post):
link = post['CONTENTURL']
self.rawData = self.getData(link)
@@ -20,30 +18,33 @@ class gallery:
self.directory = directory
self.post = post
images={}
count=0
images = {}
count = 0
for model in self.rawData['posts']['models']:
try:
for item in self.rawData['posts']['models'][model]['media']['gallery']['items']:
try:
images[count]={'id':item['mediaId'], 'url':self.rawData['posts']['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
count=count+1
except:
images[count] = {'id': item['mediaId'], 'url': self.rawData['posts'][
'models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
count = count + 1
except BaseException:
continue
except:
except BaseException:
continue
self.downloadAlbum(images,count)
self.downloadAlbum(images, count)
@staticmethod
def getData(link):
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
}
res = requests.get(link, headers=headers)
if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
if res.status_code != 200:
raise ImageNotFound(
f"Server responded with {res.status_code} to {link}")
pageSource = res.text
STARTING_STRING = "_r = {"
@@ -51,12 +52,14 @@ class gallery:
STARTING_STRING_LENGHT = len(STARTING_STRING)
try:
startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
startIndex = pageSource.index(
STARTING_STRING) + STARTING_STRING_LENGHT
endIndex = pageSource.index(ENDING_STRING, startIndex)
except ValueError:
raise NotADownloadableLinkError(f"Could not read the page source on {link}")
raise NotADownloadableLinkError(
f"Could not read the page source on {link}")
data = json.loads(pageSource[startIndex-1:endIndex+1].strip()[:-1])
data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1])
return data
def downloadAlbum(self, images, count):
@@ -80,19 +83,20 @@ class gallery:
extension = os.path.splitext(path)[1]
filename = "_".join([
str(i+1), images[i]['id']
str(i + 1), images[i]['id']
]) + extension
shortFilename = str(i+1) + "_" + images[i]['id']
shortFilename = str(i + 1) + "_" + images[i]['id']
print("\n ({}/{})".format(i+1,count))
print("\n ({}/{})".format(i + 1, count))
try:
getFile(filename,shortFilename,folderDir,images[i]['url'],indent=2)
getFile(filename, shortFilename, folderDir,
images[i]['url'], indent=2)
howManyDownloaded += 1
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
print(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
@@ -102,19 +106,16 @@ class gallery:
except Exception as exception:
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
" " +
"{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
print(GLOBAL.log_stream.getvalue(),noPrint=True)
info=str(exception)) +
"\n")
print(GLOBAL.log_stream.getvalue(), noPrint=True)
if duplicates == count:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < count:
if howManyDownloaded + duplicates < count:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)

View File

@@ -1,16 +1,14 @@
import json
import os
import urllib.request
from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.errors import (NotADownloadableLinkError)
from src.utils import GLOBAL
from src.utils import printToFile as print
class GifDeliveryNetwork:
def __init__(self,directory,POST):
def __init__(self, directory, POST):
try:
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
@@ -18,12 +16,13 @@ class GifDeliveryNetwork:
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"]
shortFilename = POST['POSTID'] + POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
getFile(filename, shortFilename, directory, POST['MEDIAURL'])
@staticmethod
def getLink(url):
@@ -31,7 +30,8 @@ class GifDeliveryNetwork:
and return it
"""
if '.webm' in url.split('/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]:
if '.webm' in url.split(
'/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]:
return url
if url[-1:] == '/':
@@ -42,8 +42,8 @@ class GifDeliveryNetwork:
pageSource = (urllib.request.urlopen(url).read().decode())
soup = BeautifulSoup(pageSource, "html.parser")
attributes = {"id":"mp4Source","type":"video/mp4"}
content = soup.find("source",attrs=attributes)
attributes = {"id": "mp4Source", "type": "video/mp4"}
content = soup.find("source", attrs=attributes)
if content is None:

View File

@@ -4,13 +4,12 @@ import urllib.request
from bs4 import BeautifulSoup
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.errors import (NotADownloadableLinkError)
from src.utils import GLOBAL
from src.utils import printToFile as print
class Redgifs:
def __init__(self,directory,POST):
def __init__(self, directory, POST):
try:
POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
except IndexError:
@@ -18,14 +17,16 @@ class Redgifs:
POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
if not os.path.exists(directory): os.makedirs(directory)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
shortFilename = POST['POSTID']+POST['EXTENSION']
filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"]
shortFilename = POST['POSTID'] + POST['EXTENSION']
getFile(filename,shortFilename,directory,POST['MEDIAURL'])
getFile(filename, shortFilename, directory, POST['MEDIAURL'])
def getLink(self, url):
@staticmethod
def getLink(url):
"""Extract direct link to the video from page's source
and return it
"""
@@ -36,15 +37,19 @@ class Redgifs:
if url[-1:] == '/':
url = url[:-1]
url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1])
url = urllib.request.Request(
"https://redgifs.com/watch/" + url.split('/')[-1])
url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')
url.add_header(
'User-Agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')
pageSource = (urllib.request.urlopen(url).read().decode())
soup = BeautifulSoup(pageSource, "html.parser")
attributes = {"data-react-helmet":"true","type":"application/ld+json"}
content = soup.find("script",attrs=attributes)
attributes = {"data-react-helmet": "true",
"type": "application/ld+json"}
content = soup.find("script", attrs=attributes)
if content is None:
raise NotADownloadableLinkError("Could not read the page source")

View File

@@ -1,3 +1,4 @@
from src.utils import printToFile as print
import io
import os
from pathlib import Path
@@ -6,36 +7,36 @@ from src.errors import FileAlreadyExistsError, TypeInSkip
from src.utils import GLOBAL
VanillaPrint = print
from src.utils import printToFile as print
class SelfPost:
def __init__(self,directory,post):
def __init__(self, directory, post):
if "self" in GLOBAL.arguments.skip: raise TypeInSkip
if "self" in GLOBAL.arguments.skip:
raise TypeInSkip
if not os.path.exists(directory): os.makedirs(directory)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post)
fileDir = directory / (filename+".md")
fileDir = directory / (filename + ".md")
print(fileDir)
print(filename+".md")
print(filename + ".md")
if Path.is_file(fileDir):
raise FileAlreadyExistsError
try:
self.writeToFile(fileDir,post)
self.writeToFile(fileDir, post)
except FileNotFoundError:
fileDir = post['POSTID']+".md"
fileDir = post['POSTID'] + ".md"
fileDir = directory / fileDir
self.writeToFile(fileDir,post)
self.writeToFile(fileDir, post)
@staticmethod
def writeToFile(directory,post):
def writeToFile(directory, post):
"""Self posts are formatted here"""
content = ("## ["
+ post["TITLE"]
@@ -54,7 +55,7 @@ class SelfPost:
+ post["REDDITOR"]
+ ")")
with io.open(directory,"w",encoding="utf-8") as FILE:
VanillaPrint(content,file=FILE)
with io.open(directory, "w", encoding="utf-8") as FILE:
VanillaPrint(content, file=FILE)
print("Downloaded")

View File

@@ -1,25 +1,25 @@
import os
import subprocess
from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import FileNameTooLong
from src.downloaders.downloaderUtils import getFile
from src.utils import GLOBAL
from src.utils import printToFile as print
class VReddit:
def __init__(self,directory,post):
extension = ".mp4"
if not os.path.exists(directory): os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post)+extension
shortFilename = post['POSTID']+extension
class VReddit:
def __init__(self, directory, post):
extension = ".mp4"
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post) + extension
shortFilename = post['POSTID'] + extension
try:
FNULL = open(os.devnull, 'w')
subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT)
except:
getFile(filename,shortFilename,directory,post['CONTENTURL'])
except BaseException:
getFile(filename, shortFilename, directory, post['CONTENTURL'])
print("FFMPEG library not found, skipping merging video and audio")
else:
videoName = post['POSTID'] + "_video"
@@ -27,16 +27,16 @@ class VReddit:
audioName = post['POSTID'] + "_audio"
audioURL = videoURL[:videoURL.rfind('/')] + '/DASH_audio.mp4'
print(directory,filename,sep="\n")
print(directory, filename, sep="\n")
getFile(videoName,videoName,directory,videoURL,silent=True)
getFile(audioName,audioName,directory,audioURL,silent=True)
getFile(videoName, videoName, directory, videoURL, silent=True)
getFile(audioName, audioName, directory, audioURL, silent=True)
try:
self._mergeAudio(videoName,
audioName,
filename,
shortFilename,
directory)
audioName,
filename,
shortFilename,
directory)
except KeyboardInterrupt:
os.remove(directory / filename)
os.remove(directory / audioName)
@@ -44,7 +44,7 @@ class VReddit:
os.rename(directory / videoName, directory / filename)
@staticmethod
def _mergeAudio(video,audio,filename,shortFilename,directory):
def _mergeAudio(video, audio, filename, shortFilename, directory):
inputVideo = str(directory / video)
inputAudio = str(directory / audio)

View File

@@ -2,22 +2,24 @@ import os
import youtube_dl
import sys
from src.downloaders.downloaderUtils import getExtension, dlProgress, createHash
from src.downloaders.downloaderUtils import createHash
from src.utils import GLOBAL
from src.utils import printToFile as print
from src.errors import FileAlreadyExistsError
class Youtube:
def __init__(self,directory,post):
if not os.path.exists(directory): os.makedirs(directory)
def __init__(self, directory, post):
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post)
print(filename)
self.download(filename,directory,post['CONTENTURL'])
self.download(filename, directory, post['CONTENTURL'])
def download(self,filename,directory,url):
def download(self, filename, directory, url):
ydl_opts = {
"format": "best",
"outtmpl": str(directory / (filename + ".%(ext)s")),
@@ -29,7 +31,7 @@ class Youtube:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
location = directory/(filename+".mp4")
location = directory / (filename + ".mp4")
if GLOBAL.arguments.no_dupes:
try:
@@ -43,9 +45,9 @@ class Youtube:
@staticmethod
def _hook(d):
if d['status'] == 'finished': return print("Downloaded")
if d['status'] == 'finished':
return print("Downloaded")
downloadedMbs = int(d['downloaded_bytes'] * (10**(-6)))
fileSize = int(d['total_bytes']*(10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
fileSize = int(d['total_bytes'] * (10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize))
sys.stdout.flush()

View File

@@ -1,9 +1,11 @@
import sys
def full_exc_info(exc_info):
def current_stack(skip=0):
try: 1/0
try:
1 / 0
except ZeroDivisionError:
f = sys.exc_info()[2].tb_frame
for i in range(skip + 2):
@@ -16,7 +18,7 @@ def full_exc_info(exc_info):
def extend_traceback(tb, stack):
class FauxTb(object):
class FauxTb():
def __init__(self, tb_frame, tb_lineno, tb_next):
self.tb_frame = tb_frame
self.tb_lineno = tb_lineno
@@ -33,80 +35,106 @@ def full_exc_info(exc_info):
full_tb = extend_traceback(tb, current_stack(1))
return t, v, full_tb
class RedditLoginFailed(Exception):
pass
class ImgurLoginError(Exception):
pass
class FileAlreadyExistsError(Exception):
pass
class NotADownloadableLinkError(Exception):
pass
class AlbumNotDownloadedCompletely(Exception):
pass
class FileNameTooLong(Exception):
pass
class InvalidRedditLink(Exception):
pass
class ProgramModeError(Exception):
pass
class SearchModeError(Exception):
pass
class RedditorNameError(Exception):
pass
class NoMatchingSubmissionFound(Exception):
pass
class NoPrawSupport(Exception):
pass
class NoRedditSupport(Exception):
pass
class MultiredditNotFound(Exception):
pass
class InsufficientPermission(Exception):
pass
class InvalidSortingType(Exception):
pass
class FileNotFoundError(Exception):
pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass
class DirectLinkNotFound(Exception):
pass
class InvalidJSONFile(Exception):
pass
class FailedToDownload(Exception):
pass
class TypeInSkip(Exception):
pass
class DomainInSkip(Exception):
pass
class ImageNotFound(Exception):
pass
class ExtensionError(Exception):
pass

View File

@@ -3,6 +3,7 @@ from os import path, remove
from src.errors import InvalidJSONFile
class JsonFile:
""" Write and read JSON files
@@ -13,10 +14,10 @@ class JsonFile:
FILEDIR = ""
def __init__(self,FILEDIR):
def __init__(self, FILEDIR):
self.FILEDIR = FILEDIR
if not path.exists(self.FILEDIR):
self.__writeToFile({},create=True)
self.__writeToFile({}, create=True)
def read(self):
try:
@@ -25,19 +26,21 @@ class JsonFile:
except json.decoder.JSONDecodeError:
raise InvalidJSONFile(f"{self.FILEDIR} cannot be read")
def add(self,toBeAdded,sub=None):
def add(self, toBeAdded, sub=None):
"""Takes a dictionary and merges it with json file.
It uses new key's value if a key already exists.
Returns the new content as a dictionary.
"""
data = self.read()
if sub: data[sub] = {**data[sub], **toBeAdded}
else: data = {**data, **toBeAdded}
if sub:
data[sub] = {**data[sub], **toBeAdded}
else:
data = {**data, **toBeAdded}
self.__writeToFile(data)
return self.read()
def delete(self,*deleteKeys):
def delete(self, *deleteKeys):
"""Delete given keys from JSON file.
Returns the new content as a dictionary.
"""
@@ -51,7 +54,7 @@ class JsonFile:
return False
self.__writeToFile(data)
def __writeToFile(self,content,create=False):
def __writeToFile(self, content, create=False):
if not create:
remove(self.FILEDIR)
with open(self.FILEDIR, 'w') as f:

View File

@@ -5,13 +5,14 @@ try:
except ModuleNotFoundError:
from errors import InvalidRedditLink
def QueryParser(PassedQueries,index):
def QueryParser(PassedQueries, index):
ExtractedQueries = {}
QuestionMarkIndex = PassedQueries.index("?")
Header = PassedQueries[:QuestionMarkIndex]
ExtractedQueries["HEADER"] = Header
Queries = PassedQueries[QuestionMarkIndex+1:]
Queries = PassedQueries[QuestionMarkIndex + 1:]
ParsedQueries = Queries.split("&")
@@ -20,15 +21,16 @@ def QueryParser(PassedQueries,index):
ExtractedQueries[Query[0]] = Query[1]
if ExtractedQueries["HEADER"] == "search":
ExtractedQueries["q"] = ExtractedQueries["q"].replace("%20"," ")
ExtractedQueries["q"] = ExtractedQueries["q"].replace("%20", " ")
return ExtractedQueries
def LinkParser(LINK):
RESULT = {}
ShortLink = False
if not "reddit.com" in LINK:
if "reddit.com" not in LINK:
raise InvalidRedditLink("Invalid reddit link")
SplittedLink = LINK.split("/")
@@ -37,7 +39,7 @@ def LinkParser(LINK):
SplittedLink = SplittedLink[2:]
try:
if (SplittedLink[-2].endswith("reddit.com") and \
if (SplittedLink[-2].endswith("reddit.com") and
SplittedLink[-1] == "") or \
SplittedLink[-1].endswith("reddit.com"):
@@ -55,14 +57,14 @@ def LinkParser(LINK):
SplittedLink = SplittedLink[1:]
if "comments" in SplittedLink:
RESULT = {"post":LINK}
RESULT = {"post": LINK}
return RESULT
elif "me" in SplittedLink or \
"u" in SplittedLink or \
"user" in SplittedLink or \
"r" in SplittedLink or \
"m" in SplittedLink:
if "me" in SplittedLink or \
"u" in SplittedLink or \
"user" in SplittedLink or \
"r" in SplittedLink or \
"m" in SplittedLink:
if "r" in SplittedLink:
RESULT["subreddit"] = SplittedLink[SplittedLink.index("r") + 1]
@@ -74,25 +76,24 @@ def LinkParser(LINK):
else:
for index in range(len(SplittedLink)):
if SplittedLink[index] == "u" or \
SplittedLink[index] == "user":
SplittedLink[index] == "user":
RESULT["user"] = SplittedLink[index+1]
RESULT["user"] = SplittedLink[index + 1]
elif SplittedLink[index] == "me":
RESULT["user"] = "me"
for index in range(len(SplittedLink)):
if SplittedLink[index] in [
"hot","top","new","controversial","rising"
]:
"hot", "top", "new", "controversial", "rising"
]:
RESULT["sort"] = SplittedLink[index]
if index == 0:
RESULT["subreddit"] = "frontpage"
elif SplittedLink[index] in ["submitted","saved","posts","upvoted"]:
elif SplittedLink[index] in ["submitted", "saved", "posts", "upvoted"]:
if SplittedLink[index] == "submitted" or \
SplittedLink[index] == "posts":
RESULT["submitted"] = {}
@@ -104,13 +105,13 @@ def LinkParser(LINK):
RESULT["upvoted"] = True
elif "?" in SplittedLink[index]:
ParsedQuery = QueryParser(SplittedLink[index],index)
ParsedQuery = QueryParser(SplittedLink[index], index)
if ParsedQuery["HEADER"] == "search":
del ParsedQuery["HEADER"]
RESULT["search"] = ParsedQuery
elif ParsedQuery["HEADER"] == "submitted" or \
ParsedQuery["HEADER"] == "posts":
ParsedQuery["HEADER"] == "posts":
del ParsedQuery["HEADER"]
RESULT["submitted"] = ParsedQuery
@@ -118,15 +119,16 @@ def LinkParser(LINK):
del ParsedQuery["HEADER"]
RESULT["queries"] = ParsedQuery
if not ("upvoted" in RESULT or \
"saved" in RESULT or \
"submitted" in RESULT or \
if not ("upvoted" in RESULT or
"saved" in RESULT or
"submitted" in RESULT or
"multireddit" in RESULT) and \
"user" in RESULT:
RESULT["submitted"] = {}
return RESULT
def LinkDesigner(LINK):
attributes = LinkParser(LINK)
@@ -138,13 +140,13 @@ def LinkDesigner(LINK):
MODE["time"] = ""
return MODE
elif "search" in attributes:
if "search" in attributes:
MODE["search"] = attributes["search"]["q"]
if "restrict_sr" in attributes["search"]:
if not (attributes["search"]["restrict_sr"] == 0 or \
attributes["search"]["restrict_sr"] == "off" or \
if not (attributes["search"]["restrict_sr"] == 0 or
attributes["search"]["restrict_sr"] == "off" or
attributes["search"]["restrict_sr"] == ""):
if "subreddit" in attributes:
@@ -169,14 +171,14 @@ def LinkDesigner(LINK):
if "include_over_18" in attributes["search"]:
if attributes["search"]["include_over_18"] == 1 or \
attributes["search"]["include_over_18"] == "on":
attributes["search"]["include_over_18"] == "on":
MODE["nsfw"] = True
else:
MODE["nsfw"] = False
else:
if "queries" in attributes:
if not ("submitted" in attributes or \
if not ("submitted" in attributes or
"posts" in attributes):
if "t" in attributes["queries"]:
@@ -196,10 +198,10 @@ def LinkDesigner(LINK):
else:
MODE["time"] = "day"
if "subreddit" in attributes and not "search" in attributes:
if "subreddit" in attributes and "search" not in attributes:
MODE["subreddit"] = attributes["subreddit"]
elif "user" in attributes and not "search" in attributes:
elif "user" in attributes and "search" not in attributes:
MODE["user"] = attributes["user"]
if "submitted" in attributes:
@@ -234,6 +236,7 @@ def LinkDesigner(LINK):
return MODE
if __name__ == "__main__":
while True:
link = input("> ")

View File

@@ -1,12 +1,12 @@
from src.errors import SearchModeError, RedditorNameError, ProgramModeError, InvalidSortingType
from src.utils import GLOBAL
from src.parser import LinkDesigner
from pathlib import Path
import sys
class ProgramMode:
def __init__(self,arguments):
def __init__(self, arguments):
self.arguments = arguments
def generate(self):
@@ -24,8 +24,8 @@ class ProgramMode:
if self.arguments.search is not None:
programMode["search"] = self.arguments.search
if self.arguments.sort == "hot" or \
self.arguments.sort == "controversial" or \
self.arguments.sort == "rising":
self.arguments.sort == "controversial" or \
self.arguments.sort == "rising":
self.arguments.sort = "relevance"
if self.arguments.sort is not None:
@@ -57,7 +57,7 @@ class ProgramMode:
programMode["time"] = self.arguments.time
elif self.arguments.subreddit is not None:
if type(self.arguments.subreddit) == list:
if isinstance(self.arguments.subreddit, list):
self.arguments.subreddit = "+".join(self.arguments.subreddit)
programMode["subreddit"] = self.arguments.subreddit
@@ -84,29 +84,29 @@ class ProgramMode:
@staticmethod
def _chooseFrom(choices):
print()
choicesByIndex = list(str(x) for x in range(len(choices)+1))
choicesByIndex = [str(x) for x in range(len(choices) + 1)]
for i in range(len(choices)):
print("{indent}[{order}] {mode}".format(
indent=" "*4,order=i+1,mode=choices[i]
indent=" " * 4, order=i + 1, mode=choices[i]
))
print(" "*4+"[0] exit\n")
print(" " * 4 + "[0] exit\n")
choice = input("> ")
while not choice.lower() in choices+choicesByIndex+["exit"]:
while not choice.lower() in choices + choicesByIndex + ["exit"]:
print("Invalid input\n")
input("> ")
if choice == "0" or choice == "exit":
sys.exit()
elif choice in choicesByIndex:
return choices[int(choice)-1]
return choices[int(choice) - 1]
else:
return choice
def _promptUser(self):
print("select program mode:")
programModes = [
"search","subreddit","multireddit",
"submitted","upvoted","saved","log"
"search", "subreddit", "multireddit",
"submitted", "upvoted", "saved", "log"
]
programMode = self._chooseFrom(programModes)
@@ -116,24 +116,25 @@ class ProgramMode:
print("\nselect sort type:")
sortTypes = [
"relevance","top","new"
"relevance", "top", "new"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
"hour", "day", "week", "month", "year", "all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
if programMode == "subreddit":
subredditInput = input("(type frontpage for all subscribed subreddits,\n" \
" use plus to seperate multi subreddits:" \
" pics+funny+me_irl etc.)\n\n" \
"subreddit: ")
subredditInput = input(
"(type frontpage for all subscribed subreddits,\n"
" use plus to seperate multi subreddits:"
" pics+funny+me_irl etc.)\n\n"
"subreddit: ")
self.arguments.subreddit = subredditInput
# while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
@@ -141,24 +142,25 @@ class ProgramMode:
# self.arguments.subreddit += "+" + subredditInput
if " " in self.arguments.subreddit:
self.arguments.subreddit = "+".join(self.arguments.subreddit.split())
self.arguments.subreddit = "+".join(
self.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
if not subredditInput.lower() == "frontpage" \
and self.arguments.subreddit[-1] == "+":
and self.arguments.subreddit[-1] == "+":
self.arguments.subreddit = self.arguments.subreddit[:-1]
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
"hot", "top", "new", "rising", "controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
if sortType in ["top","controversial"]:
if sortType in ["top", "controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
"hour", "day", "week", "month", "year", "all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
@@ -171,15 +173,15 @@ class ProgramMode:
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
"hot", "top", "new", "rising", "controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
if sortType in ["top","controversial"]:
if sortType in ["top", "controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
"hour", "day", "week", "month", "year", "all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
@@ -192,7 +194,7 @@ class ProgramMode:
print("\nselect sort type:")
sortTypes = [
"hot","top","new","controversial"
"hot", "top", "new", "controversial"
]
sortType = self._chooseFrom(sortTypes)
self.arguments.sort = sortType
@@ -200,7 +202,7 @@ class ProgramMode:
if sortType == "top":
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
"hour", "day", "week", "month", "year", "all"
]
timeFilter = self._chooseFrom(timeFilters)
self.arguments.time = timeFilter
@@ -241,30 +243,35 @@ class ProgramMode:
search = 1 if self.arguments.search else 0
modes = [
"saved","subreddit","submitted","log","link","upvoted","multireddit"
]
"saved",
"subreddit",
"submitted",
"log",
"link",
"upvoted",
"multireddit"]
values = {
x: 0 if getattr(self.arguments,x) is None or \
getattr(self.arguments,x) is False \
else 1 \
for x in modes
x: 0 if getattr(self.arguments, x) is None or
getattr(self.arguments, x) is False
else 1
for x in modes
}
if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode")
if search+values["saved"] == 2:
if search + values["saved"] == 2:
raise SearchModeError("You cannot search in your saved posts")
if search+values["submitted"] == 2:
if search + values["submitted"] == 2:
raise SearchModeError("You cannot search in submitted posts")
if search+values["upvoted"] == 2:
if search + values["upvoted"] == 2:
raise SearchModeError("You cannot search in upvoted posts")
if search+values["log"] == 2:
if search + values["log"] == 2:
raise SearchModeError("You cannot search in log files")
if values["upvoted"]+values["submitted"] == 1 and user == 0:
if values["upvoted"] + values["submitted"] == 1 and user == 0:
raise RedditorNameError("No redditor name given")

View File

@@ -2,23 +2,24 @@ import praw
import random
import socket
import webbrowser
from prawcore.exceptions import NotFound, ResponseException, Forbidden
from prawcore.exceptions import ResponseException
from src.utils import GLOBAL
from src.jsonHelper import JsonFile
from src. errors import RedditLoginFailed
class Reddit:
def __init__(self,refresh_token=None):
self.SCOPES = ['identity','history','read','save']
def __init__(self, refresh_token=None):
self.SCOPES = ['identity', 'history', 'read', 'save']
self.PORT = 7634
self.refresh_token = refresh_token
self.redditInstance = None
self.arguments = {
"client_id":GLOBAL.reddit_client_id,
"client_secret":GLOBAL.reddit_client_secret,
"user_agent":str(socket.gethostname())
"client_id": GLOBAL.reddit_client_id,
"client_secret": GLOBAL.reddit_client_secret,
"user_agent": str(socket.gethostname())
}
def begin(self):
@@ -30,18 +31,20 @@ class Reddit:
self.redditInstance.auth.scopes()
return self.redditInstance
except ResponseException:
self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT)
self.arguments["redirect_uri"] = "http://localhost:" + \
str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
else:
self.arguments["redirect_uri"] = "http://localhost:" + str(self.PORT)
self.arguments["redirect_uri"] = "http://localhost:" + \
str(self.PORT)
self.redditInstance = praw.Reddit(**self.arguments)
reddit, refresh_token = self.getRefreshToken(*self.SCOPES)
JsonFile(GLOBAL.configDirectory).add({
"reddit_username": str(reddit.user.me()),
"reddit": refresh_token
},"credentials")
}, "credentials")
return self.redditInstance
@@ -57,42 +60,45 @@ class Reddit:
server.close()
return client
def send_message(self, client, message):
@staticmethod
def send_message(client, message):
"""Send message to client and close the connection."""
client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
client.close()
def getRefreshToken(self,*scopes):
def getRefreshToken(self, *scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("---Setting up the Reddit API---\n")
print("Go to this URL and login to reddit:\n",url,sep="\n",end="\n\n")
webbrowser.open(url,new=2)
print(
"Go to this URL and login to reddit:\n",
url,
sep="\n",
end="\n\n")
webbrowser.open(url, new=2)
client = self.recieve_connection()
data = client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
params = dict([token.split('=')
for token in param_tokens])
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
if 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(client,
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
"<script>"
"alert(\"You can go back to terminal window now.\");"
"</script>"
)
return (self.redditInstance, refresh_token)

View File

@@ -1,25 +1,17 @@
import os
import sys
import random
import socket
import time
import webbrowser
import urllib.request
from urllib.error import HTTPError
import praw
from prawcore.exceptions import NotFound, ResponseException, Forbidden
from prawcore.exceptions import NotFound, Forbidden
from src.reddit import Reddit
from src.utils import GLOBAL, createLogFile, printToFile
from src.jsonHelper import JsonFile
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
NoRedditSupport, MultiredditNotFound,
InvalidSortingType, RedditLoginFailed,
InsufficientPermission, DirectLinkNotFound)
MultiredditNotFound,
InvalidSortingType, InsufficientPermission)
print = printToFile
def getPosts(programMode):
"""Call PRAW regarding to arguments and pass it to extractDetails.
Return what extractDetails has returned.
@@ -39,38 +31,38 @@ def getPosts(programMode):
if programMode["user"] == "me":
programMode["user"] = str(reddit.user.me())
if not "search" in programMode:
if "search" not in programMode:
if programMode["sort"] == "top" or programMode["sort"] == "controversial":
keyword_params = {
"time_filter":programMode["time"],
"limit":programMode["limit"]
"time_filter": programMode["time"],
"limit": programMode["limit"]
}
# OTHER SORT TYPES DON'T TAKE TIME_FILTER
else:
keyword_params = {
"limit":programMode["limit"]
"limit": programMode["limit"]
}
else:
keyword_params = {
"time_filter":programMode["time"],
"limit":programMode["limit"]
}
"time_filter": programMode["time"],
"limit": programMode["limit"]
}
if "search" in programMode:
if programMode["sort"] in ["hot","rising","controversial"]:
if programMode["sort"] in ["hot", "rising", "controversial"]:
raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in programMode:
print (
"search for \"{search}\" in\n" \
"subreddit: {subreddit}\nsort: {sort}\n" \
print(
"search for \"{search}\" in\n"
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
search=programMode["search"],
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
).upper(), noPrint=True
)
return extractDetails(
reddit.subreddit(programMode["subreddit"]).search(
@@ -81,13 +73,13 @@ def getPosts(programMode):
)
)
elif "multireddit" in programMode:
if "multireddit" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "user" in programMode:
if "user" in programMode:
raise NoPrawSupport("PRAW does not support that")
elif "saved" in programMode:
if "saved" in programMode:
raise ("Reddit does not support that")
if programMode["sort"] == "relevance":
@@ -98,103 +90,108 @@ def getPosts(programMode):
"saved posts\nuser:{username}\nlimit={limit}\n".format(
username=reddit.user.me(),
limit=programMode["limit"]
).upper(),noPrint=True
).upper(), noPrint=True
)
return extractDetails(reddit.user.me().saved(limit=programMode["limit"]))
return extractDetails(
reddit.user.me().saved(
limit=programMode["limit"]))
if "subreddit" in programMode:
if programMode["subreddit"] == "frontpage":
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
print(
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
).upper(), noPrint=True
)
return extractDetails(
getattr(reddit.front,programMode["sort"]) (**keyword_params)
getattr(reddit.front, programMode["sort"])(**keyword_params)
)
print(
"subreddit: {subreddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(), noPrint=True
)
return extractDetails(
getattr(
reddit.subreddit(programMode["subreddit"]), programMode["sort"]
)(**keyword_params)
)
else:
print (
"subreddit: {subreddit}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
limit=programMode["limit"],
sort=programMode["sort"],
subreddit=programMode["subreddit"],
time=programMode["time"]
).upper(),noPrint=True
)
return extractDetails(
getattr(
reddit.subreddit(programMode["subreddit"]),programMode["sort"]
) (**keyword_params)
)
elif "multireddit" in programMode:
print (
"user: {user}\n" \
"multireddit: {multireddit}\nsort: {sort}\n" \
if "multireddit" in programMode:
print(
"user: {user}\n"
"multireddit: {multireddit}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
user=programMode["user"],
limit=programMode["limit"],
sort=programMode["sort"],
multireddit=programMode["multireddit"],
time=programMode["time"]
).upper(),noPrint=True
).upper(), noPrint=True
)
try:
return extractDetails(
getattr(
reddit.multireddit(
programMode["user"], programMode["multireddit"]
),programMode["sort"]
) (**keyword_params)
), programMode["sort"]
)(**keyword_params)
)
except NotFound:
raise MultiredditNotFound("Multireddit not found")
elif "submitted" in programMode:
print (
"submitted posts of {user}\nsort: {sort}\n" \
print(
"submitted posts of {user}\nsort: {sort}\n"
"time: {time}\nlimit: {limit}\n".format(
limit=programMode["limit"],
sort=programMode["sort"],
user=programMode["user"],
time=programMode["time"]
).upper(),noPrint=True
).upper(), noPrint=True
)
return extractDetails(
getattr(
reddit.redditor(programMode["user"]).submissions,programMode["sort"]
) (**keyword_params)
reddit.redditor(programMode["user"]
).submissions, programMode["sort"]
)(**keyword_params)
)
elif "upvoted" in programMode:
print (
print(
"upvoted posts of {user}\nlimit: {limit}\n".format(
user=programMode["user"],
limit=programMode["limit"]
).upper(),noPrint=True
).upper(), noPrint=True
)
try:
return extractDetails(
reddit.redditor(programMode["user"]).upvoted(limit=programMode["limit"])
reddit.redditor(programMode["user"]).upvoted(
limit=programMode["limit"])
)
except Forbidden:
raise InsufficientPermission("You do not have permission to do that")
raise InsufficientPermission(
"You do not have permission to do that")
elif "post" in programMode:
print("post: {post}\n".format(post=programMode["post"]).upper(),noPrint=True)
print("post: {post}\n".format(
post=programMode["post"]).upper(), noPrint=True)
return extractDetails(
reddit.submission(url=programMode["post"]),SINGLE_POST=True
reddit.submission(url=programMode["post"]), SINGLE_POST=True
)
def extractDetails(posts,SINGLE_POST=False):
def extractDetails(posts, SINGLE_POST=False):
"""Check posts and decide if it can be downloaded.
If so, create a dictionary with post details and append them to a list.
Write all of posts to file. Return the list
@@ -212,30 +209,31 @@ def extractDetails(posts,SINGLE_POST=False):
submission = posts
postCount += 1
try:
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
details = {'POSTID': submission.id,
'TITLE': submission.title,
'REDDITOR': str(submission.author),
'TYPE': None,
'CONTENTURL': submission.url,
'SUBREDDIT': submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
'FLAIR': submission.link_flair_text,
'DATE': str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
if 'gallery' in submission.url:
details['CONTENTURL'] = genLinksifGallery(submission.media_metadata)
except AttributeError:
pass
if not any(domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
if not any(
domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
result = matchWithDownloader(submission)
if result is not None:
details = {**details, **result}
postList.append(details)
postsFile.add({postCount:details})
postsFile.add({postCount: details})
else:
try:
@@ -246,30 +244,32 @@ def extractDetails(posts,SINGLE_POST=False):
sys.stdout.flush()
if postCount % 1000 == 0:
sys.stdout.write("\n"+" "*14)
sys.stdout.write("\n" + " " * 14)
sys.stdout.flush()
try:
details = {'POSTID':submission.id,
'TITLE':submission.title,
'REDDITOR':str(submission.author),
'TYPE':None,
'CONTENTURL':submission.url,
'SUBREDDIT':submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR':submission.link_flair_text,
'DATE':str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
details = {'POSTID': submission.id,
'TITLE': submission.title,
'REDDITOR': str(submission.author),
'TYPE': None,
'CONTENTURL': submission.url,
'SUBREDDIT': submission.subreddit.display_name,
'UPVOTES': submission.score,
'FLAIR': submission.link_flair_text,
'DATE': str(time.strftime(
"%Y-%m-%d_%H-%M",
time.localtime(submission.created_utc)
))}
if 'gallery' in submission.url:
details['CONTENTURL'] = genLinksifGallery(submission.media_metadata)
except AttributeError:
continue
if details['POSTID'] in GLOBAL.downloadedPosts(): continue
if details['POSTID'] in GLOBAL.downloadedPosts():
continue
if not any(domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
if not any(
domain in submission.domain for domain in GLOBAL.arguments.skip_domain):
result = matchWithDownloader(submission)
if result is not None:
@@ -280,15 +280,15 @@ def extractDetails(posts,SINGLE_POST=False):
postCount += 1
except KeyboardInterrupt:
print("\nKeyboardInterrupt",noPrint=True)
print("\nKeyboardInterrupt", noPrint=True)
postsFile.add(allPosts)
if not len(postList) == 0:
if len(postList) != 0:
print()
return postList
else:
raise NoMatchingSubmissionFound("No matching submission was found")
raise NoMatchingSubmissionFound("No matching submission was found")
def matchWithDownloader(submission):
@@ -297,15 +297,15 @@ def matchWithDownloader(submission):
directLink = extractDirectLink(submission.url)
if directLink:
return {'TYPE': 'direct',
'CONTENTURL': directLink}
return {'TYPE': 'direct',
'CONTENTURL': directLink}
if 'v.redd.it' in submission.domain:
bitrates = ["DASH_1080","DASH_720","DASH_600", \
"DASH_480","DASH_360","DASH_240"]
bitrates = ["DASH_1080", "DASH_720", "DASH_600",
"DASH_480", "DASH_360", "DASH_240"]
for bitrate in bitrates:
videoURL = submission.url+"/"+bitrate+".mp4"
videoURL = submission.url + "/" + bitrate + ".mp4"
try:
responseCode = urllib.request.urlopen(videoURL).getcode()
@@ -319,7 +319,7 @@ def matchWithDownloader(submission):
return {'TYPE': 'gfycat'}
if 'youtube' in submission.domain \
and 'watch' in submission.url:
and 'watch' in submission.url:
return {'TYPE': 'youtube'}
if 'youtu.be' in submission.domain:
@@ -342,17 +342,18 @@ def matchWithDownloader(submission):
if 'reddit.com/gallery' in submission.url:
return {'TYPE': 'gallery'}
elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
if submission.is_self and 'self' not in GLOBAL.arguments.skip:
return {'TYPE': 'self',
'CONTENT': submission.selftext}
def extractDirectLink(URL):
"""Check if link is a direct image link.
If so, return URL,
if not, return False
"""
imageTypes = ['jpg','jpeg','png','mp4','webm','gif']
imageTypes = ['jpg', 'jpeg', 'png', 'mp4', 'webm', 'gif']
if URL[-1] == "/":
URL = URL[:-1]
@@ -362,8 +363,8 @@ def extractDirectLink(URL):
for extension in imageTypes:
if extension == URL.split(".")[-1]:
return URL
else:
return None
return None
def genLinksifGallery(metadata):
galleryImgUrls = list()

View File

@@ -1,7 +1,8 @@
from os import path
class Store:
def __init__(self,directory=None):
def __init__(self, directory=None):
self.directory = directory
if self.directory:
if path.exists(directory):

View File

@@ -1,27 +1,28 @@
import io
import json
import sys
from os import makedirs, path, remove
from os import makedirs, path
from pathlib import Path
from src.jsonHelper import JsonFile
from src.errors import FileNotFoundError
class GLOBAL:
"""Declare global variables"""
RUN_TIME = ""
config = {'imgur_client_id':None, 'imgur_client_secret': None}
config = {'imgur_client_id': None, 'imgur_client_secret': None}
arguments = None
directory = None
defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit"
configDirectory = ""
reddit_client_id = "U-6gk4ZCh3IeNQ"
reddit_client_secret = "7CZHY6AmKweZME5s50SfDGylaPg"
downloadedPosts = lambda: []
@staticmethod
def downloadedPosts(): return []
printVanilla = print
log_stream= None
log_stream = None
def createLogFile(TITLE):
"""Create a log file with given name
@@ -31,40 +32,43 @@ def createLogFile(TITLE):
folderDirectory = GLOBAL.directory / "LOG_FILES" / GLOBAL.RUN_TIME
logFilename = TITLE.upper()+'.json'
logFilename = TITLE.upper() + '.json'
if not path.exists(folderDirectory):
makedirs(folderDirectory)
FILE = JsonFile(folderDirectory / Path(logFilename))
HEADER = " ".join(sys.argv)
FILE.add({"HEADER":HEADER})
FILE.add({"HEADER": HEADER})
return FILE
def printToFile(*args, noPrint=False,**kwargs):
def printToFile(*args, noPrint=False, **kwargs):
"""Print to both CONSOLE and
CONSOLE LOG file in a folder time stampt in the name
"""
folderDirectory = GLOBAL.directory / Path("LOG_FILES") / Path(GLOBAL.RUN_TIME)
folderDirectory = GLOBAL.directory / \
Path("LOG_FILES") / Path(GLOBAL.RUN_TIME)
if not noPrint or \
GLOBAL.arguments.verbose or \
"file" in kwargs:
print(*args,**kwargs)
print(*args, **kwargs)
if not path.exists(folderDirectory):
makedirs(folderDirectory)
if not "file" in kwargs:
if "file" not in kwargs:
with io.open(
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
folderDirectory / "CONSOLE_LOG.txt", "a", encoding="utf-8"
) as FILE:
print(*args, file=FILE, **kwargs)
def nameCorrector(string,reference=None):
def nameCorrector(string, reference=None):
"""Swap strange characters from given string
with underscore (_) and shorten it.
Return the string
@@ -82,14 +86,15 @@ def nameCorrector(string,reference=None):
if totalLenght > LIMIT:
limit = LIMIT - referenceLenght
string = string[:limit-1]
string = string[:limit - 1]
string = string.replace(" ", "_")
if len(string.split('\n')) > 1:
string = "".join(string.split('\n'))
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','#', '.', '@' ,'', '', '\'', '!']
BAD_CHARS = ['\\', '/', ':', '*', '?', '"', '<',
'>', '|', '#', '.', '@', '', '', '\'', '!']
string = "".join([i if i not in BAD_CHARS else "_" for i in string])
return string