From 825fc5da5303207167f0c59eddf2932726bc3fbd Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 7 Jan 2021 10:37:49 +0100 Subject: [PATCH 1/6] reddit native gallery parser I have no idea what I'm doing, this is my first edit of Python code so forgive me --- src/downloaders/gallery.py | 120 +++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 src/downloaders/gallery.py diff --git a/src/downloaders/gallery.py b/src/downloaders/gallery.py new file mode 100644 index 0000000..78caff2 --- /dev/null +++ b/src/downloaders/gallery.py @@ -0,0 +1,120 @@ +import io +import os +import json +import urllib +import requests +from pathlib import Path + +from src.utils import GLOBAL, nameCorrector +from src.utils import printToFile as print +from src.downloaders.Direct import Direct +from src.downloaders.downloaderUtils import getFile +from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip + +class gallery: + def __init__(self,directory,post): + + link = post['CONTENTURL'] + self.rawData = self.getData(link) + + self.directory = directory + self.post = post + + images={} + count=0 + for model in self.rawData['posts']['models']: + try: + for item in self.rawData['posts']['models'][model]['media']['gallery']['items']: + try: + images[count]={'id':item['mediaId'], 'url':self.rawData['posts']['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} + count=count+1 + except: + continue + except: + continue + + self.downloadAlbum(images,count) + + @staticmethod + def getData(link): + + headers = { + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", + "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + } + res = requests.get(link, headers=headers) + if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}") + pageSource = res.text + + STARTING_STRING = "_r = {" + ENDING_STRING = "" + + STARTING_STRING_LENGHT = len(STARTING_STRING) + try: + startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT + endIndex = pageSource.index(ENDING_STRING, startIndex) + except ValueError: + raise NotADownloadableLinkError(f"Could not read the page source on {link}") + + data = json.loads(pageSource[startIndex-1:endIndex+1].strip()[:-1]) + return data + + def downloadAlbum(self, images, count): + folderName = GLOBAL.config['filename'].format(**self.post) + folderDir = self.directory / folderName + + howManyDownloaded = 0 + duplicates = 0 + + try: + if not os.path.exists(folderDir): + os.makedirs(folderDir) + except FileNotFoundError: + folderDir = self.directory / self.post['POSTID'] + os.makedirs(folderDir) + + print(folderName) + + for i in range(count): + path = urllib.parse.urlparse(images[i]['url']).path + extension = os.path.splitext(path)[1] + + filename = "_".join([ + str(i+1), images[i]['id'] + ]) + extension + shortFilename = str(i+1) + "_" + images[i]['id'] + + print("\n ({}/{})".format(i+1,count)) + + try: + getFile(filename,shortFilename,folderDir,images[i]['url'],indent=2) + howManyDownloaded += 1 + print() + + except FileAlreadyExistsError: + print(" The file already exists" + " "*10,end="\n\n") + duplicates += 1 + + except TypeInSkip: + print(" Skipping...") + howManyDownloaded += 1 + + except Exception as exception: + print("\n Could not get the file") + print( + " " + + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + class_name=exception.__class__.__name__, + info=str(exception) + ) + + "\n" + ) + print(GLOBAL.log_stream.getvalue(),noPrint=True) + + if duplicates == count: + raise FileAlreadyExistsError + elif howManyDownloaded + duplicates < count: + raise AlbumNotDownloadedCompletely( + "Album Not Downloaded Completely" + ) + From c40ee547b592b398226e81f1d147d7c942e21bcc Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 7 Jan 2021 10:40:33 +0100 Subject: [PATCH 2/6] Add native gallery catch --- src/searcher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/searcher.py b/src/searcher.py index a891322..f5fd1ec 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -332,6 +332,9 @@ def matchWithDownloader(submission): elif 'gifdeliverynetwork' in submission.domain: return {'TYPE': 'gifdeliverynetwork'} + if 'reddit.com/gallery' in submission.url: #EBJ + return {'TYPE': 'gallery'} + elif submission.is_self and 'self' not in GLOBAL.arguments.skip: return {'TYPE': 'self', 'CONTENT': submission.selftext} From 37fdd87ab9185b3bf592c36b78fb5b5b1484365a Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 7 Jan 2021 10:42:41 +0100 Subject: [PATCH 3/6] add native gallery --- script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script.py b/script.py index 1eda96c..d7e8c02 100644 --- a/script.py +++ b/script.py @@ -24,6 +24,7 @@ from src.downloaders.selfPost import SelfPost from src.downloaders.vreddit import VReddit from src.downloaders.youtube import Youtube from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork +from src.downloaders.gallery import gallery from src.errors import ImgurLimitError, NoSuitablePost, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info from src.parser import LinkDesigner from src.searcher import getPosts @@ -87,7 +88,7 @@ def downloadPost(SUBMISSION,directory): downloaders = { "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost, "redgifs":Redgifs, "gifdeliverynetwork": GifDeliveryNetwork, - "v.redd.it": VReddit, "youtube": Youtube + "v.redd.it": VReddit, "youtube": Youtube, "gallery": gallery } print() From c6a346eb9038eddb649750fdab6565ca4d93f12d Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 7 Jan 2021 10:45:16 +0100 Subject: [PATCH 4/6] Beta optout, fix for random imgur json error I have no idea what I'm doing, this is my first Python dabbling --- src/downloaders/Imgur.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/downloaders/Imgur.py b/src/downloaders/Imgur.py index 0f82c28..d361b31 100644 --- a/src/downloaders/Imgur.py +++ b/src/downloaders/Imgur.py @@ -114,7 +114,7 @@ class Imgur: @staticmethod def getData(link): - cookies = {"over18": "1"} + cookies = {"over18": "1", "postpagebeta": "0"} res = requests.get(link, cookies=cookies) if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}") pageSource = requests.get(link, cookies=cookies).text @@ -125,11 +125,17 @@ class Imgur: STARTING_STRING_LENGHT = len(STARTING_STRING) try: startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT - endIndex = pageSource.index(ENDING_STRING) + endIndex = pageSource.index(ENDING_STRING, startIndex) except ValueError: raise NotADownloadableLinkError(f"Could not read the page source on {link}") - data = pageSource[startIndex:endIndex].strip()[:-1] + while pageSource[endIndex] != "}": + endIndex=endIndex-1 + try: + data = pageSource[startIndex:endIndex+2].strip()[:-1] + except: + pageSource[endIndex+1]='}' + data = pageSource[startIndex:endIndex+3].strip()[:-1] return json.loads(data) From e7bf66f28b462c0d648018840e3ff910f42e6721 Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 7 Jan 2021 10:48:47 +0100 Subject: [PATCH 5/6] Add native gallery catch --- src/searcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/searcher.py b/src/searcher.py index f5fd1ec..df71bbb 100644 --- a/src/searcher.py +++ b/src/searcher.py @@ -332,7 +332,7 @@ def matchWithDownloader(submission): elif 'gifdeliverynetwork' in submission.domain: return {'TYPE': 'gifdeliverynetwork'} - if 'reddit.com/gallery' in submission.url: #EBJ + if 'reddit.com/gallery' in submission.url: return {'TYPE': 'gallery'} elif submission.is_self and 'self' not in GLOBAL.arguments.skip: From 332e08d038d2577d3dbb62eadcb113782edb80f7 Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Sun, 17 Jan 2021 22:58:51 +0100 Subject: [PATCH 6/6] Fix for access denied --- src/downloaders/redgifs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/downloaders/redgifs.py b/src/downloaders/redgifs.py index af2e4ec..95502de 100644 --- a/src/downloaders/redgifs.py +++ b/src/downloaders/redgifs.py @@ -36,7 +36,9 @@ class Redgifs: if url[-1:] == '/': url = url[:-1] - url = "https://redgifs.com/watch/" + url.split('/')[-1] + url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1]) + + url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64') pageSource = (urllib.request.urlopen(url).read().decode()) @@ -47,4 +49,4 @@ class Redgifs: if content is None: raise NotADownloadableLinkError("Could not read the page source") - return json.loads(content.contents[0])["video"]["contentUrl"] \ No newline at end of file + return json.loads(content.contents[0])["video"]["contentUrl"]