From e0a36f4eab2a4ba23b98f19bf4bdef2b7287301d Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Mon, 12 Sep 2022 22:26:02 -0400 Subject: [PATCH 1/7] Re-fix Redgifs API seems to return incorrect signature value when sending header. Other fixes seems to have worked temporarily but have stopped working so they're removed. --- bdfr/site_downloaders/redgifs.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 26b9dfc..e2d27e7 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -28,12 +28,7 @@ class Redgifs(BaseDownloader): except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/90.0.4430.93 Safari/537.36', - } - - content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}', headers=headers) + content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}') if content is None: raise SiteDownloaderError('Could not read the page source') @@ -62,14 +57,4 @@ class Redgifs(BaseDownloader): except (KeyError, AttributeError): raise SiteDownloaderError('Failed to find JSON data in page') - # returned domain seems to be being phased out - out = {re.sub('thumbs2', 'thumbs3', link) for link in out} - out = {Redgifs._clean_thumbs4_link(link) for link in out} - return out - - @staticmethod - def _clean_thumbs4_link(url: str) -> str: - split_url = urllib.parse.urlsplit(url) - out = split_url.scheme + '://' + split_url.netloc + split_url.path - out = re.sub('thumbs4', 'thumbs3', out) return out From 0a9ecac41093e1029317a964c3265373e030cb03 Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Fri, 16 Sep 2022 14:47:55 -0400 Subject: [PATCH 2/7] Redgif image fixes --- bdfr/site_downloaders/redgifs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index e2d27e7..9929493 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -45,9 +45,7 @@ class Redgifs(BaseDownloader): elif response_json['gif']['type'] == 2: # type 2 is an image if response_json['gif']['gallery']: content = Redgifs.retrieve_url( - f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}', - headers=headers, - ) + f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}') response_json = json.loads(content.text) out = {p['urls']['hd'] for p in response_json['gifs']} else: From 95749584ecad58e0db6d31220aaeae70e246a992 Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Fri, 16 Sep 2022 20:41:17 -0400 Subject: [PATCH 3/7] Redgifs fixed? If this doesn't work then I give up... --- bdfr/site_downloaders/download_factory.py | 4 ++-- bdfr/site_downloaders/redgifs.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 96e9a42..b0bf96a 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -27,6 +27,8 @@ class DownloadFactory: sanitised_url = DownloadFactory.sanitise_url(url) if re.match(r'(i\.)?imgur.*\.gif.+$', sanitised_url): return Imgur + elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): + return Redgifs elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ not DownloadFactory.is_web_resource(sanitised_url): return Direct @@ -40,8 +42,6 @@ class DownloadFactory: return Gfycat elif re.match(r'(m\.)?imgur.*', sanitised_url): return Imgur - elif re.match(r'(redgifs|gifdeliverynetwork)', sanitised_url): - return Redgifs elif re.match(r'reddit\.com/r/', sanitised_url): return SelfPost elif re.match(r'(m\.)?youtu\.?be', sanitised_url): diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 9929493..8d6ab21 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,7 +24,7 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)/?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{3,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') @@ -55,4 +55,7 @@ class Redgifs(BaseDownloader): except (KeyError, AttributeError): raise SiteDownloaderError('Failed to find JSON data in page') + # Update subdomain if old one is returned + out = {re.sub('thumbs2', 'thumbs3', link) for link in out} + out = {re.sub('thumbs3', 'thumbs4', link) for link in out} return out From 2f2b5b749c7348be09babc31e26fac6c2c243716 Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sun, 18 Sep 2022 13:24:42 -0400 Subject: [PATCH 4/7] Edge case coverage Cover edge cases that shouldn't ever happen but probably will sometime. Also included Imgur changes to cover similar situations of malformed/redirected links. --- bdfr/site_downloaders/download_factory.py | 4 +--- bdfr/site_downloaders/imgur.py | 9 +++++---- bdfr/site_downloaders/redgifs.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index b0bf96a..5f1d9b1 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -25,7 +25,7 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'(i\.)?imgur.*\.gif.+$', sanitised_url): + if re.match(r'imgur\.com', sanitised_url): return Imgur elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): return Redgifs @@ -40,8 +40,6 @@ class DownloadFactory: return Gallery elif re.match(r'gfycat\.', sanitised_url): return Gfycat - elif re.match(r'(m\.)?imgur.*', sanitised_url): - return Imgur elif re.match(r'reddit\.com/r/', sanitised_url): return SelfPost elif re.match(r'(m\.)?youtu\.?be', sanitised_url): diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 1f669d0..2c0ac04 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -41,10 +41,11 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: - link = link.rstrip('?') - if re.match(r'(?i).*\.gif.+$', link): - link = link.replace('i.imgur', 'imgur') - link = re.sub('(?i)\\.gif.+$', '', link) + try: + imgur_id = re.match(r'.*/(.*?)(\..{0,})?$', link).group(1) + link = f'https://imgur.com/a/{imgur_id}' + except AttributeError: + raise SiteDownloaderError(f'Could not extract Imgur ID from {link}') res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 8d6ab21..2134aa3 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,7 +24,7 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)(\..{3,})?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') From d4f7deaa6872ac35f37ccb5f17c350ab12424e9a Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sun, 18 Sep 2022 14:30:43 -0400 Subject: [PATCH 5/7] Revert "Edge case coverage" This reverts commit 2f2b5b749c7348be09babc31e26fac6c2c243716. --- bdfr/site_downloaders/download_factory.py | 4 +++- bdfr/site_downloaders/imgur.py | 9 ++++----- bdfr/site_downloaders/redgifs.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 5f1d9b1..b0bf96a 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -25,7 +25,7 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'imgur\.com', sanitised_url): + if re.match(r'(i\.)?imgur.*\.gif.+$', sanitised_url): return Imgur elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): return Redgifs @@ -40,6 +40,8 @@ class DownloadFactory: return Gallery elif re.match(r'gfycat\.', sanitised_url): return Gfycat + elif re.match(r'(m\.)?imgur.*', sanitised_url): + return Imgur elif re.match(r'reddit\.com/r/', sanitised_url): return SelfPost elif re.match(r'(m\.)?youtu\.?be', sanitised_url): diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 2c0ac04..1f669d0 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -41,11 +41,10 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: - try: - imgur_id = re.match(r'.*/(.*?)(\..{0,})?$', link).group(1) - link = f'https://imgur.com/a/{imgur_id}' - except AttributeError: - raise SiteDownloaderError(f'Could not extract Imgur ID from {link}') + link = link.rstrip('?') + if re.match(r'(?i).*\.gif.+$', link): + link = link.replace('i.imgur', 'imgur') + link = re.sub('(?i)\\.gif.+$', '', link) res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 2134aa3..8d6ab21 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,7 +24,7 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{3,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') From 7bd957aafa9b50538951afc87cb516a76b855fb0 Mon Sep 17 00:00:00 2001 From: SoulSuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sun, 18 Sep 2022 14:32:12 -0400 Subject: [PATCH 6/7] Redo edge case coverage for Redgifs Cover edge cases that shouldn't ever happen but probably will sometime. --- bdfr/site_downloaders/redgifs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 8d6ab21..2134aa3 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,7 +24,7 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)(\..{3,})?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') From 5c343ef79067abfb9479b11a7287a8078ceb31aa Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Tue, 20 Sep 2022 11:09:39 +1000 Subject: [PATCH 7/7] Fix Redgifs tests --- tests/site_downloaders/test_redgifs.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index b7ae3b3..b73ee95 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -2,6 +2,7 @@ # coding=utf-8 from unittest.mock import Mock +import re import pytest @@ -12,24 +13,26 @@ from bdfr.site_downloaders.redgifs import Redgifs @pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected'), ( ('https://redgifs.com/watch/frighteningvictorioussalamander', - {'https://thumbs3.redgifs.com/FrighteningVictoriousSalamander.mp4'}), + {'FrighteningVictoriousSalamander.mp4'}), ('https://redgifs.com/watch/springgreendecisivetaruca', - {'https://thumbs3.redgifs.com/SpringgreenDecisiveTaruca.mp4'}), + {'SpringgreenDecisiveTaruca.mp4'}), ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', - {'https://thumbs3.redgifs.com/PalegoldenrodRawHalibut.mp4'}), + {'PalegoldenrodRawHalibut.mp4'}), ('https://redgifs.com/watch/hollowintentsnowyowl', - {'https://thumbs3.redgifs.com/HollowIntentSnowyowl-large.jpg'}), + {'HollowIntentSnowyowl-large.jpg'}), ('https://www.redgifs.com/watch/lustrousstickywaxwing', - {'https://thumbs3.redgifs.com/EntireEnchantingHypsilophodon-large.jpg', - 'https://thumbs3.redgifs.com/FancyMagnificentAdamsstaghornedbeetle-large.jpg', - 'https://thumbs3.redgifs.com/LustrousStickyWaxwing-large.jpg', - 'https://thumbs3.redgifs.com/ParchedWindyArmyworm-large.jpg', - 'https://thumbs3.redgifs.com/ThunderousColorlessErmine-large.jpg', - 'https://thumbs3.redgifs.com/UnripeUnkemptWoodpecker-large.jpg'}), + {'EntireEnchantingHypsilophodon-large.jpg', + 'FancyMagnificentAdamsstaghornedbeetle-large.jpg', + 'LustrousStickyWaxwing-large.jpg', + 'ParchedWindyArmyworm-large.jpg', + 'ThunderousColorlessErmine-large.jpg', + 'UnripeUnkemptWoodpecker-large.jpg'}), )) def test_get_link(test_url: str, expected: set[str]): result = Redgifs._get_link(test_url) - assert result == expected + result = list(result) + patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected] + assert all([re.match(p, r) for p in patterns] for r in result) @pytest.mark.online