diff --git a/bulkredditdownloader/file_name_formatter.py b/bulkredditdownloader/file_name_formatter.py index 6eea2d0..aac4b48 100644 --- a/bulkredditdownloader/file_name_formatter.py +++ b/bulkredditdownloader/file_name_formatter.py @@ -16,13 +16,21 @@ logger = logging.getLogger(__name__) class FileNameFormatter: - key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date') + key_terms = ( + 'date', + 'flair', + 'postid', + 'redditor', + 'subreddit', + 'title', + 'upvotes', + ) def __init__(self, file_format_string: str, directory_format_string: str): if not self.validate_string(file_format_string): raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string') self.file_format_string = file_format_string - self.directory_format_string = directory_format_string + self.directory_format_string: list[str] = directory_format_string.split('/') @staticmethod def _format_name(submission: (Comment, Submission), format_string: str) -> str: @@ -34,8 +42,8 @@ class FileNameFormatter: raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}') result = format_string for key in attributes.keys(): - if re.search(r'(?i).*{{{}}}.*'.format(key), result): - result = re.sub(r'(?i){{{}}}'.format(key), str(attributes.get(key, 'unknown')), result) + if re.search(fr'(?i).*{{{key}}}.*', result): + result = re.sub(fr'(?i){{{key}}}', str(attributes.get(key, 'unknown')), result) logger.log(9, f'Found key string {key} in name') result = result.replace('/', '') @@ -67,7 +75,7 @@ class FileNameFormatter: 'postid': comment.id, 'upvotes': comment.score, 'flair': '', - 'date': comment.created_utc + 'date': comment.created_utc, } return comment_attributes @@ -75,8 +83,12 @@ class FileNameFormatter: self, resource: Resource, destination_directory: Path, - index: Optional[int] = None) -> Path: - subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string) + index: Optional[int] = None, + ) -> Path: + subfolder = Path( + destination_directory, + *[self._format_name(resource.source_submission, part) for part in self.directory_format_string] + ) index = f'_{str(index)}' if index else '' if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') @@ -102,8 +114,11 @@ class FileNameFormatter: filename = filename[:-1] return filename + ending - def format_resource_paths(self, resources: list[Resource], - destination_directory: Path) -> list[tuple[Path, Resource]]: + def format_resource_paths( + self, + resources: list[Resource], + destination_directory: Path, + ) -> list[tuple[Path, Resource]]: out = [] if len(resources) == 1: out.append((self.format_path(resources[0], destination_directory, None), resources[0])) @@ -121,7 +136,8 @@ class FileNameFormatter: if result: if 'POSTID' not in test_string: logger.warning( - f'Post ID not included in this file scheme, so file names are not guaranteed to be unique') + 'Some files might not be downloaded due to name conflicts as filenames are' + ' not guaranteed to be be unique without {POSTID}') return True else: return False diff --git a/bulkredditdownloader/tests/test_file_name_formatter.py b/bulkredditdownloader/tests/test_file_name_formatter.py index 2f6e9c6..bf8cee1 100644 --- a/bulkredditdownloader/tests/test_file_name_formatter.py +++ b/bulkredditdownloader/tests/test_file_name_formatter.py @@ -26,7 +26,7 @@ def submission() -> MagicMock: return test -@pytest.fixture() +@pytest.fixture(scope='session') def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission: return reddit_instance.submission(id='lgilgt') @@ -267,9 +267,30 @@ def test_format_archive_entry_comment( test_comment_id: str, expected_name: str, tmp_path: Path, - reddit_instance: praw.Reddit): + reddit_instance: praw.Reddit, +): test_comment = reddit_instance.comment(id=test_comment_id) test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme) test_entry = Resource(test_comment, '', '.json') result = test_formatter.format_path(test_entry, tmp_path) assert result.name == expected_name + + +@pytest.mark.parametrize(('test_folder_scheme', 'expected'), ( + ('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'), + ('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'), +)) +def test_multilevel_folder_scheme( + test_folder_scheme: str, + expected: str, + tmp_path: Path, + submission: MagicMock, +): + test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme) + test_resource = MagicMock() + test_resource.source_submission = submission + test_resource.extension = '.png' + result = test_formatter.format_path(test_resource, tmp_path) + result = result.relative_to(tmp_path) + assert str(result.parent) == expected + assert len(result.parents) == (len(expected.split('/')) + 1)