Allow multilevel folder schemes (#251)
* Improve some formatting * Allow multilevel folder names
This commit is contained in:
@@ -16,13 +16,21 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class FileNameFormatter:
|
class FileNameFormatter:
|
||||||
key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date')
|
key_terms = (
|
||||||
|
'date',
|
||||||
|
'flair',
|
||||||
|
'postid',
|
||||||
|
'redditor',
|
||||||
|
'subreddit',
|
||||||
|
'title',
|
||||||
|
'upvotes',
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, file_format_string: str, directory_format_string: str):
|
def __init__(self, file_format_string: str, directory_format_string: str):
|
||||||
if not self.validate_string(file_format_string):
|
if not self.validate_string(file_format_string):
|
||||||
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
||||||
self.file_format_string = file_format_string
|
self.file_format_string = file_format_string
|
||||||
self.directory_format_string = directory_format_string
|
self.directory_format_string: list[str] = directory_format_string.split('/')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _format_name(submission: (Comment, Submission), format_string: str) -> str:
|
def _format_name(submission: (Comment, Submission), format_string: str) -> str:
|
||||||
@@ -34,8 +42,8 @@ class FileNameFormatter:
|
|||||||
raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}')
|
raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}')
|
||||||
result = format_string
|
result = format_string
|
||||||
for key in attributes.keys():
|
for key in attributes.keys():
|
||||||
if re.search(r'(?i).*{{{}}}.*'.format(key), result):
|
if re.search(fr'(?i).*{{{key}}}.*', result):
|
||||||
result = re.sub(r'(?i){{{}}}'.format(key), str(attributes.get(key, 'unknown')), result)
|
result = re.sub(fr'(?i){{{key}}}', str(attributes.get(key, 'unknown')), result)
|
||||||
logger.log(9, f'Found key string {key} in name')
|
logger.log(9, f'Found key string {key} in name')
|
||||||
|
|
||||||
result = result.replace('/', '')
|
result = result.replace('/', '')
|
||||||
@@ -67,7 +75,7 @@ class FileNameFormatter:
|
|||||||
'postid': comment.id,
|
'postid': comment.id,
|
||||||
'upvotes': comment.score,
|
'upvotes': comment.score,
|
||||||
'flair': '',
|
'flair': '',
|
||||||
'date': comment.created_utc
|
'date': comment.created_utc,
|
||||||
}
|
}
|
||||||
return comment_attributes
|
return comment_attributes
|
||||||
|
|
||||||
@@ -75,8 +83,12 @@ class FileNameFormatter:
|
|||||||
self,
|
self,
|
||||||
resource: Resource,
|
resource: Resource,
|
||||||
destination_directory: Path,
|
destination_directory: Path,
|
||||||
index: Optional[int] = None) -> Path:
|
index: Optional[int] = None,
|
||||||
subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string)
|
) -> Path:
|
||||||
|
subfolder = Path(
|
||||||
|
destination_directory,
|
||||||
|
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string]
|
||||||
|
)
|
||||||
index = f'_{str(index)}' if index else ''
|
index = f'_{str(index)}' if index else ''
|
||||||
if not resource.extension:
|
if not resource.extension:
|
||||||
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
|
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
|
||||||
@@ -102,8 +114,11 @@ class FileNameFormatter:
|
|||||||
filename = filename[:-1]
|
filename = filename[:-1]
|
||||||
return filename + ending
|
return filename + ending
|
||||||
|
|
||||||
def format_resource_paths(self, resources: list[Resource],
|
def format_resource_paths(
|
||||||
destination_directory: Path) -> list[tuple[Path, Resource]]:
|
self,
|
||||||
|
resources: list[Resource],
|
||||||
|
destination_directory: Path,
|
||||||
|
) -> list[tuple[Path, Resource]]:
|
||||||
out = []
|
out = []
|
||||||
if len(resources) == 1:
|
if len(resources) == 1:
|
||||||
out.append((self.format_path(resources[0], destination_directory, None), resources[0]))
|
out.append((self.format_path(resources[0], destination_directory, None), resources[0]))
|
||||||
@@ -121,7 +136,8 @@ class FileNameFormatter:
|
|||||||
if result:
|
if result:
|
||||||
if 'POSTID' not in test_string:
|
if 'POSTID' not in test_string:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f'Post ID not included in this file scheme, so file names are not guaranteed to be unique')
|
'Some files might not be downloaded due to name conflicts as filenames are'
|
||||||
|
' not guaranteed to be be unique without {POSTID}')
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ def submission() -> MagicMock:
|
|||||||
return test
|
return test
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture(scope='session')
|
||||||
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
|
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
|
||||||
return reddit_instance.submission(id='lgilgt')
|
return reddit_instance.submission(id='lgilgt')
|
||||||
|
|
||||||
@@ -267,9 +267,30 @@ def test_format_archive_entry_comment(
|
|||||||
test_comment_id: str,
|
test_comment_id: str,
|
||||||
expected_name: str,
|
expected_name: str,
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
reddit_instance: praw.Reddit):
|
reddit_instance: praw.Reddit,
|
||||||
|
):
|
||||||
test_comment = reddit_instance.comment(id=test_comment_id)
|
test_comment = reddit_instance.comment(id=test_comment_id)
|
||||||
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
|
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
|
||||||
test_entry = Resource(test_comment, '', '.json')
|
test_entry = Resource(test_comment, '', '.json')
|
||||||
result = test_formatter.format_path(test_entry, tmp_path)
|
result = test_formatter.format_path(test_entry, tmp_path)
|
||||||
assert result.name == expected_name
|
assert result.name == expected_name
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
|
||||||
|
('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
|
||||||
|
('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
|
||||||
|
))
|
||||||
|
def test_multilevel_folder_scheme(
|
||||||
|
test_folder_scheme: str,
|
||||||
|
expected: str,
|
||||||
|
tmp_path: Path,
|
||||||
|
submission: MagicMock,
|
||||||
|
):
|
||||||
|
test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme)
|
||||||
|
test_resource = MagicMock()
|
||||||
|
test_resource.source_submission = submission
|
||||||
|
test_resource.extension = '.png'
|
||||||
|
result = test_formatter.format_path(test_resource, tmp_path)
|
||||||
|
result = result.relative_to(tmp_path)
|
||||||
|
assert str(result.parent) == expected
|
||||||
|
assert len(result.parents) == (len(expected.split('/')) + 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user