Rename folder

2021-02-07 17:08:24 +10:00
parent 5ef58f147f
commit f2415b6bd0
14 changed files with 22 additions and 22 deletions
--- a/bulkredditdownloader/site_downloaders/erome.py
+++ b/bulkredditdownloader/site_downloaders/erome.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+
+import logging
+import pathlib
+import re
+import urllib.error
+import urllib.request
+from html.parser import HTMLParser
+
+from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
+from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
+from bulkredditdownloader.utils import GLOBAL
+
+logger = logging.getLogger(__name__)
+
+
+class Erome(BaseDownloader):
+    def __init__(self, directory: pathlib.Path, post: dict):
+        super().__init__(directory, post)
+        self.download()
+
+    def download(self):
+        try:
+            images = self._get_links(self.post['CONTENTURL'])
+        except urllib.error.HTTPError:
+            raise NotADownloadableLinkError("Not a downloadable link")
+
+        images_length = len(images)
+        how_many_downloaded = len(images)
+        duplicates = 0
+
+        if images_length == 1:
+            """Filenames are declared here"""
+            filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
+
+            image = images[0]
+            if not re.match(r'https?://.*', image):
+                image = "https://" + image
+
+            self._download_resource(filename, self.directory, image)
+
+        else:
+            filename = GLOBAL.config['filename'].format(**self.post)
+            logger.info(filename)
+
+            folder_dir = self.directory / filename
+
+            folder_dir.mkdir(exist_ok=True)
+
+            for i, image in enumerate(images):
+                extension = self._get_extension(image)
+                filename = str(i + 1) + extension
+
+                if not re.match(r'https?://.*', image):
+                    image = "https://" + image
+
+                logger.info("  ({}/{})".format(i + 1, images_length))
+                logger.info("  {}".format(filename))
+
+                try:
+                    self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2)
+                except FileAlreadyExistsError:
+                    logger.info("  The file already exists" + " " * 10, end="\n\n")
+                    duplicates += 1
+                    how_many_downloaded -= 1
+
+                except Exception as exception:
+                    # raise exception
+                    logger.error("\n  Could not get the file")
+                    logger.error(
+                        "  "
+                        + "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
+                        + "\n"
+                    )
+                    how_many_downloaded -= 1
+
+            if duplicates == images_length:
+                raise FileAlreadyExistsError
+            elif how_many_downloaded + duplicates < images_length:
+                raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
+
+    @staticmethod
+    def _get_links(url: str) -> list[str]:
+        content = []
+        line_number = None
+
+        # TODO: move to bs4 and requests
+        class EromeParser(HTMLParser):
+            tag = None
+
+            def handle_starttag(self, tag, attrs):
+                self.tag = {tag: {attr[0]: attr[1] for attr in attrs}}
+
+        page_source = (urllib.request.urlopen(url).read().decode().split('\n'))
+
+        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
+        for i in range(len(page_source)):
+            obj = EromeParser()
+            obj.feed(page_source[i])
+            tag = obj.tag
+
+            if tag is not None:
+                if "div" in tag:
+                    if "id" in tag["div"]:
+                        if tag["div"]["id"] == "album":
+                            line_number = i
+                            break
+
+        for line in page_source[line_number:]:
+            obj = EromeParser()
+            obj.feed(line)
+            tag = obj.tag
+            if tag is not None:
+                if "img" in tag:
+                    if "class" in tag["img"]:
+                        if tag["img"]["class"] == "img-front":
+                            content.append(tag["img"]["src"])
+                elif "source" in tag:
+                    content.append(tag["source"]["src"])
+
+        return [link for link in content if link.endswith("_480p.mp4") or not link.endswith(".mp4")]