From 851c10f6e98c4e32beaa3989956514ee4dda3719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ali=20Parlak=C3=A7=C4=B1?= Date: Sun, 21 Mar 2021 22:37:30 +0300 Subject: [PATCH] Fix gallery (#203) * Delete Gallery.py * Update and rename gallery.py to Gallery.py * Update script.py --- script.py | 1 - src/downloaders/Gallery.py | 126 +++++++++++++++++++++++++++++++++---- src/downloaders/gallery.py | 121 ----------------------------------- 3 files changed, 113 insertions(+), 135 deletions(-) delete mode 100644 src/downloaders/gallery.py diff --git a/script.py b/script.py index 61315d6..2394778 100644 --- a/script.py +++ b/script.py @@ -22,7 +22,6 @@ from src.downloaders.selfPost import SelfPost from src.downloaders.vreddit import VReddit from src.downloaders.youtube import Youtube from src.downloaders.gifDeliveryNetwork import GifDeliveryNetwork -from src.downloaders.gallery import gallery from src.errors import ImgurLimitError, FileAlreadyExistsError, ImgurLoginError, NotADownloadableLinkError, NoSuitablePost, InvalidJSONFile, FailedToDownload, TypeInSkip, DomainInSkip, AlbumNotDownloadedCompletely, full_exc_info from src.searcher import getPosts from src.utils import (GLOBAL, createLogFile, nameCorrector, diff --git a/src/downloaders/Gallery.py b/src/downloaders/Gallery.py index 2fdc5ca..780c60c 100644 --- a/src/downloaders/Gallery.py +++ b/src/downloaders/Gallery.py @@ -1,21 +1,121 @@ import os +import json +import urllib +import requests -from src.downloaders.downloaderUtils import getFile, getExtension - -from src.errors import FileNameTooLong from src.utils import GLOBAL from src.utils import printToFile as print +from src.downloaders.downloaderUtils import getFile +from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, NotADownloadableLinkError, TypeInSkip + class Gallery: - def __init__(self,directory,POST): - i=0 - for key in POST['CONTENTURL']: - i=i+1 - extension = getExtension(key) - if not os.path.exists(directory): os.makedirs(directory) + def __init__(self, directory, post): - filename = GLOBAL.config['filename'].format(**POST)+' - '+str(i)+extension - print(filename) - shortFilename = POST['POSTID']+' - '+str(i)+extension + link = post['CONTENTURL'] + self.rawData = self.getData(link) - getFile(filename,shortFilename,directory,key) \ No newline at end of file + self.directory = directory + self.post = post + + images = {} + count = 0 + for model in self.rawData['posts']['models']: + try: + for item in self.rawData['posts']['models'][model]['media']['gallery']['items']: + try: + images[count] = {'id': item['mediaId'], 'url': self.rawData['posts'][ + 'models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} + count = count + 1 + except BaseException: + continue + except BaseException: + continue + + self.downloadAlbum(images, count) + + @staticmethod + def getData(link): + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + } + res = requests.get(link, headers=headers) + if res.status_code != 200: + raise ImageNotFound( + f"Server responded with {res.status_code} to {link}") + pageSource = res.text + + STARTING_STRING = "_r = {" + ENDING_STRING = "" + + STARTING_STRING_LENGHT = len(STARTING_STRING) + try: + startIndex = pageSource.index( + STARTING_STRING) + STARTING_STRING_LENGHT + endIndex = pageSource.index(ENDING_STRING, startIndex) + except ValueError: + raise NotADownloadableLinkError( + f"Could not read the page source on {link}") + + data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1]) + return data + + def downloadAlbum(self, images, count): + folderName = GLOBAL.config['filename'].format(**self.post) + folderDir = self.directory / folderName + + howManyDownloaded = 0 + duplicates = 0 + + try: + if not os.path.exists(folderDir): + os.makedirs(folderDir) + except FileNotFoundError: + folderDir = self.directory / self.post['POSTID'] + os.makedirs(folderDir) + + print(folderName) + + for i in range(count): + path = urllib.parse.urlparse(images[i]['url']).path + extension = os.path.splitext(path)[1] + + filename = "_".join([ + str(i + 1), images[i]['id'] + ]) + extension + shortFilename = str(i + 1) + "_" + images[i]['id'] + + print("\n ({}/{})".format(i + 1, count)) + + try: + getFile(filename, shortFilename, folderDir, + images[i]['url'], indent=2) + howManyDownloaded += 1 + print() + + except FileAlreadyExistsError: + print(" The file already exists" + " " * 10, end="\n\n") + duplicates += 1 + + except TypeInSkip: + print(" Skipping...") + howManyDownloaded += 1 + + except Exception as exception: + print("\n Could not get the file") + print( + " " + + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + class_name=exception.__class__.__name__, + info=str(exception)) + + "\n") + print(GLOBAL.log_stream.getvalue(), noPrint=True) + + if duplicates == count: + raise FileAlreadyExistsError + if howManyDownloaded + duplicates < count: + raise AlbumNotDownloadedCompletely( + "Album Not Downloaded Completely" + ) diff --git a/src/downloaders/gallery.py b/src/downloaders/gallery.py deleted file mode 100644 index ae32646..0000000 --- a/src/downloaders/gallery.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import json -import urllib -import requests - -from src.utils import GLOBAL -from src.utils import printToFile as print -from src.downloaders.downloaderUtils import getFile -from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, NotADownloadableLinkError, TypeInSkip - - -class gallery: - def __init__(self, directory, post): - - link = post['CONTENTURL'] - self.rawData = self.getData(link) - - self.directory = directory - self.post = post - - images = {} - count = 0 - for model in self.rawData['posts']['models']: - try: - for item in self.rawData['posts']['models'][model]['media']['gallery']['items']: - try: - images[count] = {'id': item['mediaId'], 'url': self.rawData['posts'][ - 'models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} - count = count + 1 - except BaseException: - continue - except BaseException: - continue - - self.downloadAlbum(images, count) - - @staticmethod - def getData(link): - - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", - } - res = requests.get(link, headers=headers) - if res.status_code != 200: - raise ImageNotFound( - f"Server responded with {res.status_code} to {link}") - pageSource = res.text - - STARTING_STRING = "_r = {" - ENDING_STRING = "" - - STARTING_STRING_LENGHT = len(STARTING_STRING) - try: - startIndex = pageSource.index( - STARTING_STRING) + STARTING_STRING_LENGHT - endIndex = pageSource.index(ENDING_STRING, startIndex) - except ValueError: - raise NotADownloadableLinkError( - f"Could not read the page source on {link}") - - data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1]) - return data - - def downloadAlbum(self, images, count): - folderName = GLOBAL.config['filename'].format(**self.post) - folderDir = self.directory / folderName - - howManyDownloaded = 0 - duplicates = 0 - - try: - if not os.path.exists(folderDir): - os.makedirs(folderDir) - except FileNotFoundError: - folderDir = self.directory / self.post['POSTID'] - os.makedirs(folderDir) - - print(folderName) - - for i in range(count): - path = urllib.parse.urlparse(images[i]['url']).path - extension = os.path.splitext(path)[1] - - filename = "_".join([ - str(i + 1), images[i]['id'] - ]) + extension - shortFilename = str(i + 1) + "_" + images[i]['id'] - - print("\n ({}/{})".format(i + 1, count)) - - try: - getFile(filename, shortFilename, folderDir, - images[i]['url'], indent=2) - howManyDownloaded += 1 - print() - - except FileAlreadyExistsError: - print(" The file already exists" + " " * 10, end="\n\n") - duplicates += 1 - - except TypeInSkip: - print(" Skipping...") - howManyDownloaded += 1 - - except Exception as exception: - print("\n Could not get the file") - print( - " " + - "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( - class_name=exception.__class__.__name__, - info=str(exception)) + - "\n") - print(GLOBAL.log_stream.getvalue(), noPrint=True) - - if duplicates == count: - raise FileAlreadyExistsError - if howManyDownloaded + duplicates < count: - raise AlbumNotDownloadedCompletely( - "Album Not Downloaded Completely" - )