Add option to search for files pre-emptively
This commit is contained in:
@@ -74,6 +74,8 @@ class RedditDownloader:
|
|||||||
self._resolve_user_name()
|
self._resolve_user_name()
|
||||||
|
|
||||||
self.master_hash_list = []
|
self.master_hash_list = []
|
||||||
|
if self.args.search_existing:
|
||||||
|
self.master_hash_list.extend(self.scan_existing_files(self.download_directory))
|
||||||
self.authenticator = self._create_authenticator()
|
self.authenticator = self._create_authenticator()
|
||||||
logger.log(9, 'Created site authenticator')
|
logger.log(9, 'Created site authenticator')
|
||||||
|
|
||||||
@@ -302,8 +304,9 @@ class RedditDownloader:
|
|||||||
self._download_submission(submission)
|
self._download_submission(submission)
|
||||||
|
|
||||||
def _download_submission(self, submission: praw.models.Submission):
|
def _download_submission(self, submission: praw.models.Submission):
|
||||||
if self.download_filter.check_url(submission.url):
|
if not self.download_filter.check_url(submission.url):
|
||||||
|
logger.debug(f'Download filter remove submission {submission.id} with URL {submission.url}')
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
downloader_class = DownloadFactory.pull_lever(submission.url)
|
downloader_class = DownloadFactory.pull_lever(submission.url)
|
||||||
downloader = downloader_class(submission)
|
downloader = downloader_class(submission)
|
||||||
@@ -330,9 +333,10 @@ class RedditDownloader:
|
|||||||
logger.debug(f'Hash added to master list: {res.hash.hexdigest()}')
|
logger.debug(f'Hash added to master list: {res.hash.hexdigest()}')
|
||||||
logger.info(f'Downloaded submission {submission.name}')
|
logger.info(f'Downloaded submission {submission.name}')
|
||||||
|
|
||||||
def scan_existing_files(self) -> list[str]:
|
@staticmethod
|
||||||
|
def scan_existing_files(directory: Path) -> list[str]:
|
||||||
files = []
|
files = []
|
||||||
for (dirpath, dirnames, filenames) in os.walk(self.download_directory):
|
for (dirpath, dirnames, filenames) in os.walk(directory):
|
||||||
files.extend([Path(dirpath, file) for file in filenames])
|
files.extend([Path(dirpath, file) for file in filenames])
|
||||||
logger.info(f'Calculating hashes for {len(files)} files')
|
logger.info(f'Calculating hashes for {len(files)} files')
|
||||||
hash_list = []
|
hash_list = []
|
||||||
|
|||||||
@@ -388,8 +388,7 @@ def test_sanitise_subreddit_name(test_name: str, expected: str):
|
|||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
def test_search_existing_files(downloader_mock: MagicMock):
|
def test_search_existing_files():
|
||||||
downloader_mock.download_directory = Path('.').resolve().expanduser()
|
results = RedditDownloader.scan_existing_files(Path('.'))
|
||||||
results = RedditDownloader.scan_existing_files(downloader_mock)
|
|
||||||
assert all([isinstance(result, str) for result in results])
|
assert all([isinstance(result, str) for result in results])
|
||||||
assert len(results) >= 40
|
assert len(results) >= 40
|
||||||
|
|||||||
@@ -73,14 +73,15 @@ def test_cli_download_multireddit(test_args: list[str], tmp_path: Path):
|
|||||||
@pytest.mark.reddit
|
@pytest.mark.reddit
|
||||||
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||||
@pytest.mark.parametrize('test_args', (
|
@pytest.mark.parametrize('test_args', (
|
||||||
['--user', 'helen_darten', '-m', 'xxyyzzqwertty', '-L', 10],
|
['--user', 'helen_darten', '-m', 'xxyyzzqwerty', '-L', 10],
|
||||||
))
|
))
|
||||||
def test_cli_download_multireddit_nonexistent(test_args: list[str], tmp_path: Path):
|
def test_cli_download_multireddit_nonexistent(test_args: list[str], tmp_path: Path):
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert 'Failed to get submissions for multireddit xxyyzzqwerty' in result.output
|
assert 'Failed to get submissions for multireddit' in result.output
|
||||||
|
assert 'received 404 HTTP response' in result.output
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
@@ -117,3 +118,19 @@ def test_cli_download_user_data_bad_me_unauthenticated(test_args: list[str], tmp
|
|||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert 'To use "me" as a user, an authenticated Reddit instance must be used' in result.output
|
assert 'To use "me" as a user, an authenticated Reddit instance must be used' in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.online
|
||||||
|
@pytest.mark.reddit
|
||||||
|
@pytest.mark.authenticated
|
||||||
|
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||||
|
@pytest.mark.parametrize('test_args', (
|
||||||
|
['--subreddit', 'python', '-L', 10, '--search-existing'],
|
||||||
|
))
|
||||||
|
def test_cli_download_search_existing(test_args: list[str], tmp_path: Path):
|
||||||
|
Path(tmp_path, 'test.txt').touch()
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert 'Calculating hashes for' in result.output
|
||||||
|
|||||||
Reference in New Issue
Block a user