Fix Erome downloader

This commit is contained in:
Serene-Arc
2021-03-18 19:10:27 +10:00
committed by Ali Parlakci
parent 0929469bef
commit 540b237da6
2 changed files with 26 additions and 17 deletions

View File

@@ -21,20 +21,20 @@ class Erome(BaseDownloader):
super().__init__(post) super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
images = self._get_links(self.post.url) links = self._get_links(self.post.url)
if not images: if not links:
raise NotADownloadableLinkError('Erome parser could not find any links') raise NotADownloadableLinkError('Erome parser could not find any links')
if len(images) == 1: if len(links) == 1:
image = images.pop() link = links.pop()
image = self._validate_url(image) link = self._validate_url(link)
return [Resource(self.post, image)] return [Resource(self.post, link)]
else: else:
out = [] out = []
for i, image in enumerate(images): for i, link in enumerate(links):
image = self._validate_url(image) link = self._validate_url(link)
out.append(Resource(self.post, image)) out.append(Resource(self.post, link))
return out return out
@staticmethod @staticmethod
@@ -47,8 +47,8 @@ class Erome(BaseDownloader):
def _get_links(url: str) -> set[str]: def _get_links(url: str) -> set[str]:
page = requests.get(url) page = requests.get(url)
soup = bs4.BeautifulSoup(page.text) soup = bs4.BeautifulSoup(page.text)
front_images = soup.find_all('img', attrs={'class': 'img-front'}) front_images = soup.find_all('img', attrs={'class': 'lasyload'})
out = [im.get('src') for im in front_images] out = [im.get('data-src') for im in front_images]
videos = soup.find_all('source') videos = soup.find_all('source')
out.extend([vid.get('src') for vid in videos]) out.extend([vid.get('src') for vid in videos])

View File

@@ -39,15 +39,24 @@ def test_get_link(test_url: str, expected_urls: tuple[str]):
@pytest.mark.online @pytest.mark.online
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.parametrize(('test_url', 'expected_number_of_resources', 'expected_hashes'), ( @pytest.mark.parametrize(('test_url', 'expected_hashes'), (
('https://www.erome.com/a/vqtPuLXh', 2, ('5da2a8d60d87bed279431fdec8e7d72f', '243d17b52a728911b022829badbc524e')), ('https://www.erome.com/a/vqtPuLXh', {'5da2a8d60d87bed279431fdec8e7d72f'}),
('https://www.erome.com/i/ItASD33e', {'b0d73fedc9ce6995c2f2c4fdb6f11eff'}),
('https://www.erome.com/a/lGrcFxmb', {
'0e98f9f527a911dcedde4f846bb5b69f',
'25696ae364750a5303fc7d7dc78b35c1',
'63775689f438bd393cde7db6d46187de',
'a1abf398cfd4ef9cfaf093ceb10c746a',
'bd9e1a4ea5ef0d6ba47fb90e337c2d14'
}),
)) ))
def test_download_resource(test_url: str, expected_number_of_resources: int, expected_hashes: tuple[str]): def test_download_resource(test_url: str, expected_hashes: tuple[str]):
# Can't compare hashes for this test, Erome doesn't return the exact same file from request to request so the hash
# will change back and forth randomly
mock_submission = MagicMock() mock_submission = MagicMock()
mock_submission.url = test_url mock_submission.url = test_url
test_site = Erome(mock_submission) test_site = Erome(mock_submission)
resources = test_site.find_resources() resources = test_site.find_resources()
assert len(resources) == expected_number_of_resources
[res.download() for res in resources] [res.download() for res in resources]
resource_hashes = [res.hash.hexdigest() for res in resources] resource_hashes = [res.hash.hexdigest() for res in resources]
assert set(resource_hashes) == set(expected_hashes) assert len(resource_hashes) == len(expected_hashes)