Make downloadfilter apply itself to Resources
This commit is contained in:
@@ -4,6 +4,8 @@
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from bdfr.resource import Resource
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,13 +23,20 @@ class DownloadFilter:
|
|||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _check_extension(self, url: str) -> bool:
|
def check_resource(self, res: Resource) -> bool:
|
||||||
|
if not self._check_extension(res.extension):
|
||||||
|
return False
|
||||||
|
elif not self._check_domain(res.url):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _check_extension(self, resource_extension: str) -> bool:
|
||||||
if not self.excluded_extensions:
|
if not self.excluded_extensions:
|
||||||
return True
|
return True
|
||||||
combined_extensions = '|'.join(self.excluded_extensions)
|
combined_extensions = '|'.join(self.excluded_extensions)
|
||||||
pattern = re.compile(r'.*({})$'.format(combined_extensions))
|
pattern = re.compile(r'.*({})$'.format(combined_extensions))
|
||||||
if re.match(pattern, url):
|
if re.match(pattern, resource_extension):
|
||||||
logger.log(9, f'Url "{url}" matched with "{str(pattern)}"')
|
logger.log(9, f'Url "{resource_extension}" matched with "{str(pattern)}"')
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -394,9 +394,6 @@ class RedditDownloader:
|
|||||||
if not isinstance(submission, praw.models.Submission):
|
if not isinstance(submission, praw.models.Submission):
|
||||||
logger.warning(f'{submission.id} is not a submission')
|
logger.warning(f'{submission.id} is not a submission')
|
||||||
return
|
return
|
||||||
if not self.download_filter.check_url(submission.url):
|
|
||||||
logger.debug(f'Download filter removed submission {submission.id} with URL {submission.url}')
|
|
||||||
return
|
|
||||||
try:
|
try:
|
||||||
downloader_class = DownloadFactory.pull_lever(submission.url)
|
downloader_class = DownloadFactory.pull_lever(submission.url)
|
||||||
downloader = downloader_class(submission)
|
downloader = downloader_class(submission)
|
||||||
@@ -413,6 +410,8 @@ class RedditDownloader:
|
|||||||
for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory):
|
for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory):
|
||||||
if destination.exists():
|
if destination.exists():
|
||||||
logger.debug(f'File {destination} already exists, continuing')
|
logger.debug(f'File {destination} already exists, continuing')
|
||||||
|
elif not self.download_filter.check_resource(res):
|
||||||
|
logger.debug(f'Download filter removed {submission.id} with URL {submission.url}')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
res.download(self.args.max_wait_time)
|
res.download(self.args.max_wait_time)
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bdfr.download_filter import DownloadFilter
|
from bdfr.download_filter import DownloadFilter
|
||||||
|
from bdfr.resource import Resource
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
@@ -11,13 +14,14 @@ def download_filter() -> DownloadFilter:
|
|||||||
return DownloadFilter(['mp4', 'mp3'], ['test.com', 'reddit.com'])
|
return DownloadFilter(['mp4', 'mp3'], ['test.com', 'reddit.com'])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
@pytest.mark.parametrize(('test_extension', 'expected'), (
|
||||||
('test.mp4', False),
|
('.mp4', False),
|
||||||
('test.avi', True),
|
('.avi', True),
|
||||||
('test.random.mp3', False),
|
('.random.mp3', False),
|
||||||
|
('mp4', False),
|
||||||
))
|
))
|
||||||
def test_filter_extension(test_url: str, expected: bool, download_filter: DownloadFilter):
|
def test_filter_extension(test_extension: str, expected: bool, download_filter: DownloadFilter):
|
||||||
result = download_filter._check_extension(test_url)
|
result = download_filter._check_extension(test_extension)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
@@ -42,7 +46,8 @@ def test_filter_domain(test_url: str, expected: bool, download_filter: DownloadF
|
|||||||
('http://reddit.com/test.gif', False),
|
('http://reddit.com/test.gif', False),
|
||||||
))
|
))
|
||||||
def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilter):
|
def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilter):
|
||||||
result = download_filter.check_url(test_url)
|
test_resource = Resource(MagicMock(), test_url)
|
||||||
|
result = download_filter.check_resource(test_resource)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
@@ -54,5 +59,6 @@ def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilt
|
|||||||
))
|
))
|
||||||
def test_filter_empty_filter(test_url: str):
|
def test_filter_empty_filter(test_url: str):
|
||||||
download_filter = DownloadFilter()
|
download_filter = DownloadFilter()
|
||||||
result = download_filter.check_url(test_url)
|
test_resource = Resource(MagicMock(), test_url)
|
||||||
|
result = download_filter.check_resource(test_resource)
|
||||||
assert result is True
|
assert result is True
|
||||||
|
|||||||
@@ -161,13 +161,14 @@ def test_cli_download_search_existing(test_args: list[str], tmp_path: Path):
|
|||||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||||
@pytest.mark.parametrize('test_args', (
|
@pytest.mark.parametrize('test_args', (
|
||||||
['--subreddit', 'tumblr', '-L', '25', '--skip', 'png', '--skip', 'jpg'],
|
['--subreddit', 'tumblr', '-L', '25', '--skip', 'png', '--skip', 'jpg'],
|
||||||
|
['--subreddit', 'MaliciousCompliance', '-L', '25', '--skip', 'txt'],
|
||||||
))
|
))
|
||||||
def test_cli_download_download_filters(test_args: list[str], tmp_path: Path):
|
def test_cli_download_download_filters(test_args: list[str], tmp_path: Path):
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert 'Download filter removed submission' in result.output
|
assert 'Download filter removed ' in result.output
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
|
|||||||
Reference in New Issue
Block a user