Merge pull request #177 from EkriirkE/master

Reddit native album downloader, minor fix for imgur json error, access denied in redgifs
This commit is contained in:
Ali Parlakçı
2021-02-05 13:27:21 +03:00
committed by GitHub
5 changed files with 138 additions and 6 deletions

View File

@@ -24,6 +24,7 @@ from src.downloaders.selfPost import SelfPost
from src.downloaders.vreddit import VReddit
from src.downloaders.youtube import Youtube
from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork
from src.downloaders.gallery import gallery
from src.errors import ImgurLimitError, NoSuitablePost, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info
from src.parser import LinkDesigner
from src.searcher import getPosts
@@ -87,7 +88,7 @@ def downloadPost(SUBMISSION,directory):
downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost,
"redgifs":Redgifs, "gifdeliverynetwork": GifDeliveryNetwork,
"v.redd.it": VReddit, "youtube": Youtube
"v.redd.it": VReddit, "youtube": Youtube, "gallery": gallery
}
print()

View File

@@ -114,7 +114,7 @@ class Imgur:
@staticmethod
def getData(link):
cookies = {"over18": "1"}
cookies = {"over18": "1", "postpagebeta": "0"}
res = requests.get(link, cookies=cookies)
if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
pageSource = requests.get(link, cookies=cookies).text
@@ -125,11 +125,17 @@ class Imgur:
STARTING_STRING_LENGHT = len(STARTING_STRING)
try:
startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
endIndex = pageSource.index(ENDING_STRING)
endIndex = pageSource.index(ENDING_STRING, startIndex)
except ValueError:
raise NotADownloadableLinkError(f"Could not read the page source on {link}")
data = pageSource[startIndex:endIndex].strip()[:-1]
while pageSource[endIndex] != "}":
endIndex=endIndex-1
try:
data = pageSource[startIndex:endIndex+2].strip()[:-1]
except:
pageSource[endIndex+1]='}'
data = pageSource[startIndex:endIndex+3].strip()[:-1]
return json.loads(data)

120
src/downloaders/gallery.py Normal file
View File

@@ -0,0 +1,120 @@
import io
import os
import json
import urllib
import requests
from pathlib import Path
from src.utils import GLOBAL, nameCorrector
from src.utils import printToFile as print
from src.downloaders.Direct import Direct
from src.downloaders.downloaderUtils import getFile
from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip
class gallery:
def __init__(self,directory,post):
link = post['CONTENTURL']
self.rawData = self.getData(link)
self.directory = directory
self.post = post
images={}
count=0
for model in self.rawData['posts']['models']:
try:
for item in self.rawData['posts']['models'][model]['media']['gallery']['items']:
try:
images[count]={'id':item['mediaId'], 'url':self.rawData['posts']['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
count=count+1
except:
continue
except:
continue
self.downloadAlbum(images,count)
@staticmethod
def getData(link):
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
}
res = requests.get(link, headers=headers)
if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
pageSource = res.text
STARTING_STRING = "_r = {"
ENDING_STRING = "</script>"
STARTING_STRING_LENGHT = len(STARTING_STRING)
try:
startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
endIndex = pageSource.index(ENDING_STRING, startIndex)
except ValueError:
raise NotADownloadableLinkError(f"Could not read the page source on {link}")
data = json.loads(pageSource[startIndex-1:endIndex+1].strip()[:-1])
return data
def downloadAlbum(self, images, count):
folderName = GLOBAL.config['filename'].format(**self.post)
folderDir = self.directory / folderName
howManyDownloaded = 0
duplicates = 0
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = self.directory / self.post['POSTID']
os.makedirs(folderDir)
print(folderName)
for i in range(count):
path = urllib.parse.urlparse(images[i]['url']).path
extension = os.path.splitext(path)[1]
filename = "_".join([
str(i+1), images[i]['id']
]) + extension
shortFilename = str(i+1) + "_" + images[i]['id']
print("\n ({}/{})".format(i+1,count))
try:
getFile(filename,shortFilename,folderDir,images[i]['url'],indent=2)
howManyDownloaded += 1
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
except TypeInSkip:
print(" Skipping...")
howManyDownloaded += 1
except Exception as exception:
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
print(GLOBAL.log_stream.getvalue(),noPrint=True)
if duplicates == count:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < count:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)

View File

@@ -36,7 +36,9 @@ class Redgifs:
if url[-1:] == '/':
url = url[:-1]
url = "https://redgifs.com/watch/" + url.split('/')[-1]
url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1])
url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')
pageSource = (urllib.request.urlopen(url).read().decode())
@@ -47,4 +49,4 @@ class Redgifs:
if content is None:
raise NotADownloadableLinkError("Could not read the page source")
return json.loads(content.contents[0])["video"]["contentUrl"]
return json.loads(content.contents[0])["video"]["contentUrl"]

View File

@@ -332,6 +332,9 @@ def matchWithDownloader(submission):
elif 'gifdeliverynetwork' in submission.domain:
return {'TYPE': 'gifdeliverynetwork'}
if 'reddit.com/gallery' in submission.url:
return {'TYPE': 'gallery'}
elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
return {'TYPE': 'self',
'CONTENT': submission.selftext}