Added erome support

2018-07-23 23:16:56 +03:00
parent 2d334d56bf
commit 7314e17125
3 changed files with 158 additions and 19 deletions
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -2,6 +2,7 @@ import io
 import os
 import sys
 import urllib.request
+from html.parser import HTMLParser
 from pathlib import Path

 import imgurpython
@@ -69,6 +70,134 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
    else:
        raise FileAlreadyExistsError

+class Erome:
+    def __init__(self,directory,post):
+        # try:
+        #     IMAGES = self.getLinks(post['postURL'])
+        # except IndexError:
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        #     pass
+        # except Exception as exception:
+        #     pass
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        IMAGES = self.getLinks(post['postURL'])
+
+        imagesLenght = len(IMAGES)
+        howManyDownloaded = imagesLenght
+        duplicates = 0
+
+        if imagesLenght == 1:
+            
+            extension = getExtension(IMAGES[0])
+
+            title = nameCorrector(post['postTitle'])
+            print(title+"_" +post['postId']+extension)
+
+            fileDir = title + "_" + post['postId'] + extension
+            fileDir = directory / fileDir
+
+            tempDir = title + "_" + post['postId'] + '.tmp'
+            tempDir = directory / tempDir
+
+            imageURL = "https:" + IMAGES[0]
+
+            try:
+                getFile(fileDir,tempDir,imageURL)
+            except FileNameTooLong:
+                fileDir = directory / (post['postId'] + extension)
+                tempDir = directory / (post['postId'] + '.tmp')
+                getFile(fileDir,tempDir,imageURL)
+
+        else:
+            title = nameCorrector(post['postTitle'])
+            print(title+"_"+post['postId'],end="\n\n")
+
+            folderDir = directory / (title+"_"+post['postId'])
+
+            try:
+                if not os.path.exists(folderDir):
+                    os.makedirs(folderDir)
+            except FileNotFoundError:
+                folderDir = directory / post['postId']
+                os.makedirs(folderDir)
+
+            for i in range(imagesLenght):
+                
+                extension = getExtension(IMAGES[i])
+
+                fileName = str(i+1)
+                imageURL = "https:" + IMAGES[i]
+
+                fileDir = folderDir / (fileName + extension)
+                tempDir = folderDir / (fileName + ".tmp")
+
+                print("  ({}/{})".format(i+1,imagesLenght))
+                print("  {}".format(fileName+extension))
+
+                try:
+                    getFile(fileDir,tempDir,imageURL,indent=2)
+                    print()
+                except FileAlreadyExistsError:
+                    print("  The file already exists" + " "*10,end="\n\n")
+                    duplicates += 1
+                    howManyDownloaded -= 1
+
+                except Exception as exception:
+                    raise exception
+                    print("\n  Could not get the file")
+                    print("  " + str(exception) + "\n")
+                    exceptionType = exception
+                    howManyDownloaded -= 1
+
+            if duplicates == imagesLenght:
+                raise FileAlreadyExistsError
+            elif howManyDownloaded + duplicates < imagesLenght:
+                raise AlbumNotDownloadedCompletely(
+                    "Album Not Downloaded Completely"
+                )
+
+    def getLinks(self,url,lineNumber=129):
+ 
+        content = []
+        lineNumber = None
+
+        class EromeParser(HTMLParser):
+            tag = None
+            def handle_starttag(self, tag, attrs):
+                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
+
+        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
+
+        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
+        for i in range(len(pageSource)):
+            obj = EromeParser()
+            obj.feed(pageSource[i])
+            tag = obj.tag
+            
+            if tag is not None:
+                if "div" in tag:
+                    if "id" in tag["div"]:
+                        if tag["div"]["id"] == "album":
+                            lineNumber = i
+                            break
+
+        for line in pageSource[lineNumber:]:
+            obj = EromeParser()
+            obj.feed(line)
+            tag = obj.tag
+            if tag is not None:
+                if "img" in tag:
+                    if "class" in tag["img"]:
+                        if tag["img"]["class"]=="img-front":
+                            content.append(tag["img"]["src"])
+                elif "source" in tag:
+                    content.append(tag["source"]["src"])
+                    
+        return [
+            link for link in content \
+            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
+        ]
+
 class Imgur:
    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()
@@ -171,7 +300,7 @@ class Imgur:

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
-            elif howManyDownloaded < imagesLenght:
+            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )