import os import urllib.request from html.parser import HTMLParser from src.downloaders.downloaderUtils import getFile from src.downloaders.downloaderUtils import getExtension from src.errors import (AlbumNotDownloadedCompletely, NotADownloadableLinkError, FileAlreadyExistsError) from src.utils import GLOBAL from src.utils import printToFile as print class Erome: def __init__(self, directory, post): try: IMAGES = self.getLinks(post['CONTENTURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") imagesLenght = len(IMAGES) howManyDownloaded = imagesLenght duplicates = 0 if imagesLenght == 1: extension = getExtension(IMAGES[0]) """Filenames are declared here""" filename = GLOBAL.config['filename'].format( **post) + post["EXTENSION"] shortFilename = post['POSTID'] + extension imageURL = IMAGES[0] if 'https://' not in imageURL or 'http://' not in imageURL: imageURL = "https://" + imageURL getFile(filename, shortFilename, directory, imageURL) else: filename = GLOBAL.config['filename'].format(**post) print(filename) folderDir = directory / filename try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['POSTID'] os.makedirs(folderDir) for i in range(imagesLenght): extension = getExtension(IMAGES[i]) filename = str(i + 1) + extension imageURL = IMAGES[i] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL print(" ({}/{})".format(i + 1, imagesLenght)) print(" {}".format(filename)) try: getFile(filename, filename, folderDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: # raise exception print("\n Could not get the file") print( " " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception) ) + "\n" ) howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError if howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" ) def getLinks(self, url, lineNumber=129): content = [] lineNumber = None class EromeParser(HTMLParser): tag = None def handle_starttag(self, tag, attrs): self.tag = {tag: {attr[0]: attr[1] for attr in attrs}} pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS""" for i in range(len(pageSource)): obj = EromeParser() obj.feed(pageSource[i]) tag = obj.tag if tag is not None: if "div" in tag: if "id" in tag["div"]: if tag["div"]["id"] == "album": lineNumber = i break for line in pageSource[lineNumber:]: obj = EromeParser() obj.feed(line) tag = obj.tag if tag is not None: if "img" in tag: if "class" in tag["img"]: if tag["img"]["class"] == "img-front": content.append(tag["img"]["src"]) elif "source" in tag: content.append(tag["source"]["src"]) return [ link for link in content if link.endswith("_480p.mp4") or not link.endswith(".mp4") ]