Files
BDFR_Web/src/downloader.py
2018-07-10 01:30:50 +03:00

333 lines
11 KiB
Python

import io
import os
import sys
import urllib.request
from pathlib import Path
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
FileNameTooLong, ImgurLoginError,
NotADownloadableLinkError)
from src.tools import GLOBAL, nameCorrector, printToFile
try:
from imgurpython import *
except ModuleNotFoundError:
print("\nimgurpython not found on your computer, installing...\n")
from src.tools import install
install("imgurpython")
from imgurpython import *
VanillaPrint = print
print = printToFile
def dlProgress(count, blockSize, totalSize):
"""Function for writing download progress to console
"""
downloadedMbs = int(count*blockSize*(10**(-6)))
fileSize = int(totalSize*(10**(-6)))
sys.stdout.write("\r{}Mb/{}Mb".format(downloadedMbs,fileSize))
sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
sys.stdout.flush()
def getExtension(link):
"""Extract file extension from image link.
If didn't find any, return '.jpg'
"""
imageTypes = ['jpg','png','mp4','webm','gif']
parsed = link.split('.')
for TYPE in imageTypes:
if TYPE in parsed:
return "."+parsed[-1]
else:
return '.jpg'
def getFile(fileDir,tempDir,imageURL,indent=0):
"""Downloads given file to given directory.
fileDir -- Full file directory
tempDir -- Full file directory with the extension of '.tmp'
imageURL -- URL to the file to be downloaded
redditID -- Post's reddit id if renaming the file is necessary.
As too long file names seem not working.
"""
if not (os.path.isfile(fileDir)):
for i in range(3):
try:
urllib.request.urlretrieve(imageURL,
tempDir,
reporthook=dlProgress)
os.rename(tempDir,fileDir)
print(" "*indent+"Downloaded"+" "*10)
break
except ConnectionResetError as exception:
print(" "*indent + str(exception))
print(" "*indent + "Trying again\n")
except FileNotFoundError:
raise FileNameTooLong
else:
raise FileAlreadyExistsError
class Imgur:
def __init__(self,directory,post):
self.imgurClient = self.initImgur()
imgurID = self.getId(post['postURL'])
content = self.getLink(imgurID)
if not os.path.exists(directory): os.makedirs(directory)
if content['type'] == 'image':
try:
post['mediaURL'] = content['object'].mp4
except AttributeError:
post['mediaURL'] = content['object'].link
post['postExt'] = getExtension(post['mediaURL'])
title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+post['postExt'])
fileDir = title + "_" + post['postId'] + post['postExt']
fileDir = directory / fileDir
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
try:
getFile(fileDir,tempDir,post['mediaURL'])
except FileNameTooLong:
fileDir = directory / post['postId'] + post['postExt']
tempDir = directory / post['postId'] + '.tmp'
getFile(fileDir,tempDir,post['mediaURL'])
elif content['type'] == 'album':
exceptionType = ""
images = content['object'].images
imagesLenght = len(images)
howManyDownloaded = imagesLenght
duplicates = 0
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId'])
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
try:
imageURL = images[i]['mp4']
except KeyError:
imageURL = images[i]['link']
images[i]['Ext'] = getExtension(imageURL)
fileName = (str(i+1)
+ "_"
+ nameCorrector(str(images[i]['title']))
+ "_"
+ images[i]['id'])
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+images[i]['Ext']))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
# IF FILE NAME IS TOO LONG, IT WONT REGISTER
except FileNameTooLong:
fileName = (str(i+1) + "_" + images[i]['id'])
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
try:
getFile(fileDir,tempDir,imageURL,indent=2)
# IF STILL TOO LONG
except FileNameTooLong:
fileName = str(i+1)
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
getFile(fileDir,tempDir,imageURL,indent=2)
except Exception as exception:
print("\n Could not get the file")
print(" " + str(exception) + "\n")
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght:
raise AlbumNotDownloadedCompletely
@staticmethod
def initImgur():
"""Initialize imgur api"""
config = GLOBAL.config
return ImgurClient(
config['imgur_client_id'],
config['imgur_client_secret']
)
def getId(self,submissionURL):
"""Extract imgur post id
and determine if its a single image or album
"""
domainLenght = len("imgur.com/")
if submissionURL[-1] == "/":
submissionURL = submissionURL[:-1]
if "a/" in submissionURL or "gallery/" in submissionURL:
albumId = submissionURL.split("/")[-1]
return {'id':albumId, 'type':'album'}
else:
url = submissionURL.replace('.','/').split('/')
imageId = url[url.index('com')+1]
return {'id':imageId, 'type':'image'}
def getLink(self,identity):
"""Request imgur object from imgur api
"""
if identity['type'] == 'image':
return {'object':self.imgurClient.get_image(identity['id']),
'type':'image'}
elif identity['type'] == 'album':
return {'object':self.imgurClient.get_album(identity['id']),
'type':'album'}
def get_credits():
return Imgur.initImgur().get_credits()
class Gfycat:
def __init__(self,directory,POST):
try:
POST['mediaURL'] = self.getLink(POST['postURL'])
except IndexError:
raise NotADownloadableLinkError
except Exception as exception:
raise NotADownloadableLinkError
POST['postExt'] = getExtension(POST['mediaURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
tempDir = directory / (title+"_"+POST['postId']+".tmp")
try:
getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['mediaURL'])
def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
"""Extract direct link to the video from page's source
and return it
"""
if '.webm' in url or '.mp4' in url or '.gif' in url:
return url
if url[-1:] == '/':
url = url[:-1]
if 'gifs' in url:
url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
theLine = pageSource[lineNumber]
lenght = len(query)
link = []
for i in range(len(theLine)):
if theLine[i:i+lenght] == query:
cursor = (i+lenght)+1
while not theLine[cursor] == '"':
link.append(theLine[cursor])
cursor += 1
break
if "".join(link) == "":
raise NotADownloadableLinkError
return "".join(link)
class Direct:
def __init__(self,directory,POST):
POST['postExt'] = getExtension(POST['postURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
fileDir = title+"_"+POST['postId']+POST['postExt']
fileDir = directory / fileDir
tempDir = title+"_"+POST['postId']+".tmp"
tempDir = directory / tempDir
try:
getFile(fileDir,tempDir,POST['postURL'])
except FileNameTooLong:
fileDir = directory / (POST['postId']+POST['postExt'])
tempDir = directory / (POST['postId']+".tmp")
getFile(fileDir,tempDir,POST['postURL'])
class Self:
def __init__(self,directory,post):
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId']+".md")
fileDir = title+"_"+post['postId']+".md"
fileDir = directory / fileDir
if Path.is_file(fileDir):
raise FileAlreadyExistsError
self.writeToFile(fileDir,post)
@staticmethod
def writeToFile(directory,post):
content = ("## ["
+ post["postTitle"]
+ "]("
+ post["postURL"]
+ ")\n"
+ post["postContent"]
+ "\n\n---\n\n"
+ "submitted by [u/"
+ post["postSubmitter"]
+ "](https://www.reddit.com/user/"
+ post["postSubmitter"]
+ ")")
with io.open(directory,"w",encoding="utf-8") as FILE:
VanillaPrint(content,file=FILE)
print("Downloaded")