Add blacklist for web filetypes
This commit is contained in:
@@ -24,7 +24,8 @@ class DownloadFactory:
|
||||
sanitised_url = DownloadFactory.sanitise_url(url)
|
||||
if re.match(r'(i\.)?imgur.*\.gifv$', sanitised_url):
|
||||
return Imgur
|
||||
elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url):
|
||||
elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \
|
||||
not DownloadFactory.is_web_resource(sanitised_url):
|
||||
return Direct
|
||||
elif re.match(r'erome\.com.*', sanitised_url):
|
||||
return Erome
|
||||
@@ -55,3 +56,20 @@ class DownloadFactory:
|
||||
split_url = split_url.netloc + split_url.path
|
||||
split_url = re.sub(beginning_regex, '', split_url)
|
||||
return split_url
|
||||
|
||||
@staticmethod
|
||||
def is_web_resource(url: str) -> bool:
|
||||
web_extensions = (
|
||||
'asp',
|
||||
'cfm',
|
||||
'cfml',
|
||||
'css',
|
||||
'html',
|
||||
'js',
|
||||
'php',
|
||||
'xhtml',
|
||||
)
|
||||
if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@@ -72,3 +72,16 @@ def test_factory_lever_bad(test_url: str):
|
||||
def test_sanitise_url(test_url: str, expected: str):
|
||||
result = DownloadFactory.sanitise_url(test_url)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
||||
('www.example.com/test.asp', True),
|
||||
('www.example.com/test.html', True),
|
||||
('www.example.com/test.js', True),
|
||||
('www.example.com/test.xhtml', True),
|
||||
('www.example.com/test.mp4', False),
|
||||
('www.example.com/test.png', False),
|
||||
))
|
||||
def test_is_web_resource(test_url: str, expected: bool):
|
||||
result = DownloadFactory.is_web_resource(test_url)
|
||||
assert result == expected
|
||||
|
||||
Reference in New Issue
Block a user