Merge pull request #427 from Serene-Arc/bug_fix_386
Add a file blacklist to the direct downloader
This commit is contained in:
@@ -21,10 +21,11 @@ from bdfr.site_downloaders.youtube import Youtube
|
|||||||
class DownloadFactory:
|
class DownloadFactory:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pull_lever(url: str) -> Type[BaseDownloader]:
|
def pull_lever(url: str) -> Type[BaseDownloader]:
|
||||||
sanitised_url = DownloadFactory._sanitise_url(url)
|
sanitised_url = DownloadFactory.sanitise_url(url)
|
||||||
if re.match(r'(i\.)?imgur.*\.gifv$', sanitised_url):
|
if re.match(r'(i\.)?imgur.*\.gifv$', sanitised_url):
|
||||||
return Imgur
|
return Imgur
|
||||||
elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url):
|
elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \
|
||||||
|
not DownloadFactory.is_web_resource(sanitised_url):
|
||||||
return Direct
|
return Direct
|
||||||
elif re.match(r'erome\.com.*', sanitised_url):
|
elif re.match(r'erome\.com.*', sanitised_url):
|
||||||
return Erome
|
return Erome
|
||||||
@@ -49,9 +50,29 @@ class DownloadFactory:
|
|||||||
f'No downloader module exists for url {url}')
|
f'No downloader module exists for url {url}')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sanitise_url(url: str) -> str:
|
def sanitise_url(url: str) -> str:
|
||||||
beginning_regex = re.compile(r'\s*(www\.?)?')
|
beginning_regex = re.compile(r'\s*(www\.?)?')
|
||||||
split_url = urllib.parse.urlsplit(url)
|
split_url = urllib.parse.urlsplit(url)
|
||||||
split_url = split_url.netloc + split_url.path
|
split_url = split_url.netloc + split_url.path
|
||||||
split_url = re.sub(beginning_regex, '', split_url)
|
split_url = re.sub(beginning_regex, '', split_url)
|
||||||
return split_url
|
return split_url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_web_resource(url: str) -> bool:
|
||||||
|
web_extensions = (
|
||||||
|
'asp',
|
||||||
|
'aspx',
|
||||||
|
'cfm',
|
||||||
|
'cfml',
|
||||||
|
'css',
|
||||||
|
'htm',
|
||||||
|
'html',
|
||||||
|
'js',
|
||||||
|
'php',
|
||||||
|
'php3',
|
||||||
|
'xhtml',
|
||||||
|
)
|
||||||
|
if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|||||||
@@ -69,6 +69,19 @@ def test_factory_lever_bad(test_url: str):
|
|||||||
('https://youtube.com/watch?v=Gv8Wz74FjVA', 'youtube.com/watch'),
|
('https://youtube.com/watch?v=Gv8Wz74FjVA', 'youtube.com/watch'),
|
||||||
('https://i.imgur.com/BuzvZwb.gifv', 'i.imgur.com/BuzvZwb.gifv'),
|
('https://i.imgur.com/BuzvZwb.gifv', 'i.imgur.com/BuzvZwb.gifv'),
|
||||||
))
|
))
|
||||||
def test_sanitise_urll(test_url: str, expected: str):
|
def test_sanitise_url(test_url: str, expected: str):
|
||||||
result = DownloadFactory._sanitise_url(test_url)
|
result = DownloadFactory.sanitise_url(test_url)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(('test_url', 'expected'), (
|
||||||
|
('www.example.com/test.asp', True),
|
||||||
|
('www.example.com/test.html', True),
|
||||||
|
('www.example.com/test.js', True),
|
||||||
|
('www.example.com/test.xhtml', True),
|
||||||
|
('www.example.com/test.mp4', False),
|
||||||
|
('www.example.com/test.png', False),
|
||||||
|
))
|
||||||
|
def test_is_web_resource(test_url: str, expected: bool):
|
||||||
|
result = DownloadFactory.is_web_resource(test_url)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|||||||
Reference in New Issue
Block a user