Files
BDFR_Web/src/downloaders/Erome.py
vlad doster fc42afbabe (maint) code clean up (#187)
## bdfr

- Add the bound instance as method parameter
- Change methods not using its bound instance to staticmethods
- Fix dangerous default argument
- Refactor the comparison involving `not`
- Refactor unnecessary `else` / `elif` when `if` block has a `raise` statement
- Refactor unnecessary `else` / `elif` when `if` block has a `return` statement
- Refactor useless `else` block in the loop
- Remove implicit `object` from the base class
- Remove reimported module
- Remove unnecessary generator
- Remove unnecessary return statement
- Remove unnecessary use of comprehension
- Remove unused imports
- Use `is` to compare type of objects
- Using not x can cause unwanted results

## Dockerfile

- use a pinned Python version tag instead of latest
- leverage cached requirements

Signed-off-by: Vladislav Doster <mvdoster@gmail.com>

Co-authored-by: Ali Parlakçı <parlakciali@gmail.com>
2021-02-25 12:32:06 +03:00

137 lines
4.4 KiB
Python

import os
import urllib.request
from html.parser import HTMLParser
from src.downloaders.downloaderUtils import getFile
from src.downloaders.downloaderUtils import getExtension
from src.errors import (AlbumNotDownloadedCompletely,
NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import GLOBAL
from src.utils import printToFile as print
class Erome:
def __init__(self, directory, post):
try:
IMAGES = self.getLinks(post['CONTENTURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(
**post) + post["EXTENSION"]
shortFilename = post['POSTID'] + extension
imageURL = IMAGES[0]
if 'https://' not in imageURL or 'http://' not in imageURL:
imageURL = "https://" + imageURL
getFile(filename, shortFilename, directory, imageURL)
else:
filename = GLOBAL.config['filename'].format(**post)
print(filename)
folderDir = directory / filename
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['POSTID']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
filename = str(i + 1) + extension
imageURL = IMAGES[i]
if 'https://' not in imageURL and 'http://' not in imageURL:
imageURL = "https://" + imageURL
print(" ({}/{})".format(i + 1, imagesLenght))
print(" {}".format(filename))
try:
getFile(filename, filename, folderDir, imageURL, indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
# raise exception
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
if howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self, url, lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag: {attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"] == "img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]