Files
BDFR_Web/bulkredditdownloader/downloaders/Erome.py
2021-04-18 16:42:49 +03:00

122 lines
4.4 KiB
Python

import os
import pathlib
import urllib.error
import urllib.request
from html.parser import HTMLParser
from bulkredditdownloader.downloaders.downloaderUtils import getExtension, getFile
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
class Erome:
def __init__(self, directory: pathlib.Path, post: dict):
try:
images = self.getLinks(post['CONTENTURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
images_length = len(images)
how_many_downloaded = images_length
duplicates = 0
if images_length == 1:
extension = getExtension(images[0])
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + extension
image_url = images[0]
if 'https://' not in image_url or 'http://' not in image_url:
image_url = "https://" + image_url
getFile(filename, short_filename, directory, image_url)
else:
filename = GLOBAL.config['filename'].format(**post)
print(filename)
folder_dir = directory / filename
try:
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
except FileNotFoundError:
folder_dir = directory / post['POSTID']
os.makedirs(folder_dir)
for i in range(images_length):
extension = getExtension(images[i])
filename = str(i + 1) + extension
image_url = images[i]
if 'https://' not in image_url and 'http://' not in image_url:
image_url = "https://" + image_url
print(" ({}/{})".format(i + 1, images_length))
print(" {}".format(filename))
try:
getFile(filename, filename, folder_dir, image_url, indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
how_many_downloaded -= 1
except Exception as exception:
# raise exception
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
+ "\n"
)
how_many_downloaded -= 1
if duplicates == images_length:
raise FileAlreadyExistsError
elif how_many_downloaded + duplicates < images_length:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
def getLinks(self, url: str) -> list[str]:
content = []
line_number = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag: {attr[0]: attr[1] for attr in attrs}}
page_source = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(page_source)):
obj = EromeParser()
obj.feed(page_source[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
line_number = i
break
for line in page_source[line_number:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"] == "img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [link for link in content if link.endswith("_480p.mp4") or not link.endswith(".mp4")]