Merge pull request #177 from EkriirkE/master

Reddit native album downloader, minor fix for imgur json error, access denied in redgifs
2021-02-05 13:27:21 +03:00
parent 45ed203859 332e08d038
commit 10bc42a063
5 changed files with 138 additions and 6 deletions
@@ -24,6 +24,7 @@ from src.downloaders.selfPost import SelfPost
 from src.downloaders.vreddit import VReddit
 from src.downloaders.youtube import Youtube
 from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork
 from src.downloaders.gallery import gallery
 from src.errors import ImgurLimitError, NoSuitablePost, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info
 from src.parser import LinkDesigner
 from src.searcher import getPosts
@@ -87,7 +88,7 @@ def downloadPost(SUBMISSION,directory):
    downloaders = {
        "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":SelfPost,
        "redgifs":Redgifs, "gifdeliverynetwork": GifDeliveryNetwork,
-        "v.redd.it": VReddit, "youtube": Youtube
+        "v.redd.it": VReddit, "youtube": Youtube, "gallery": gallery
    }
    print()
@@ -114,7 +114,7 @@ class Imgur:
    @staticmethod 
    def getData(link):
-        cookies = {"over18": "1"}
+        cookies = {"over18": "1", "postpagebeta": "0"}
        res = requests.get(link, cookies=cookies)
        if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
        pageSource = requests.get(link, cookies=cookies).text
@@ -125,11 +125,17 @@ class Imgur:
        STARTING_STRING_LENGHT = len(STARTING_STRING)
        try:
            startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
-            endIndex = pageSource.index(ENDING_STRING)
+            endIndex = pageSource.index(ENDING_STRING, startIndex)
        except ValueError:
            raise NotADownloadableLinkError(f"Could not read the page source on {link}")
-        data = pageSource[startIndex:endIndex].strip()[:-1]
+        while pageSource[endIndex] != "}":
            endIndex=endIndex-1
        try:
            data = pageSource[startIndex:endIndex+2].strip()[:-1]
        except:
            pageSource[endIndex+1]='}'
            data = pageSource[startIndex:endIndex+3].strip()[:-1]
        return json.loads(data)
@@ -0,0 +1,120 @@
 import io
 import os
 import json
 import urllib
 import requests
 from pathlib import Path
 from src.utils import GLOBAL, nameCorrector
 from src.utils import printToFile as print
 from src.downloaders.Direct import Direct
 from src.downloaders.downloaderUtils import getFile
 from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip
 class gallery:
    def __init__(self,directory,post):
        link = post['CONTENTURL']
        self.rawData = self.getData(link)
        self.directory = directory
        self.post = post
        images={}
        count=0
        for model in self.rawData['posts']['models']:
            try:
                for item in self.rawData['posts']['models'][model]['media']['gallery']['items']:
                    try:
                        images[count]={'id':item['mediaId'], 'url':self.rawData['posts']['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
                        count=count+1
                    except:
                        continue
            except:
                continue
        self.downloadAlbum(images,count)
    @staticmethod 
    def getData(link):
        headers = {
            "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
            "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        }
        res = requests.get(link, headers=headers)
        if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
        pageSource = res.text
        STARTING_STRING = "_r = {"
        ENDING_STRING = "</script>"
        STARTING_STRING_LENGHT = len(STARTING_STRING)
        try:
            startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
            endIndex = pageSource.index(ENDING_STRING, startIndex)
        except ValueError:
            raise NotADownloadableLinkError(f"Could not read the page source on {link}")
        data = json.loads(pageSource[startIndex-1:endIndex+1].strip()[:-1])
        return data
    def downloadAlbum(self, images, count):
        folderName = GLOBAL.config['filename'].format(**self.post)
        folderDir = self.directory / folderName
        howManyDownloaded = 0
        duplicates = 0
        try:
            if not os.path.exists(folderDir):
                os.makedirs(folderDir)
        except FileNotFoundError:
            folderDir = self.directory / self.post['POSTID']
            os.makedirs(folderDir)
        print(folderName)
        for i in range(count):
            path = urllib.parse.urlparse(images[i]['url']).path
            extension = os.path.splitext(path)[1]
            filename = "_".join([
                str(i+1), images[i]['id']
            ]) + extension
            shortFilename = str(i+1) + "_" + images[i]['id']
            print("\n  ({}/{})".format(i+1,count))
            try:
                getFile(filename,shortFilename,folderDir,images[i]['url'],indent=2)
                howManyDownloaded += 1
                print()
            except FileAlreadyExistsError:
                print("  The file already exists" + " "*10,end="\n\n")
                duplicates += 1
            except TypeInSkip:
                print("  Skipping...")
                howManyDownloaded += 1
            except Exception as exception:
                print("\n  Could not get the file")
                print(
                    "  "
                    + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
                        class_name=exception.__class__.__name__,
                        info=str(exception)
                    )
                    + "\n"
                )
                print(GLOBAL.log_stream.getvalue(),noPrint=True)
        if duplicates == count:
            raise FileAlreadyExistsError
        elif howManyDownloaded + duplicates < count:
            raise AlbumNotDownloadedCompletely(
                "Album Not Downloaded Completely"
            )           
@@ -36,7 +36,9 @@ class Redgifs:
        if url[-1:] == '/':
            url = url[:-1]
-        url = "https://redgifs.com/watch/" + url.split('/')[-1]
+        url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1])
        url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')
        pageSource = (urllib.request.urlopen(url).read().decode())
@@ -332,6 +332,9 @@ def matchWithDownloader(submission):
    elif 'gifdeliverynetwork' in submission.domain:
        return {'TYPE': 'gifdeliverynetwork'}
    if 'reddit.com/gallery' in submission.url:
        return {'TYPE': 'gallery'}
    elif submission.is_self and 'self' not in GLOBAL.arguments.skip:
        return {'TYPE': 'self',
                'CONTENT': submission.selftext}