Format according to the black standard
This commit is contained in:
@@ -18,20 +18,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class FileNameFormatter:
|
||||
key_terms = (
|
||||
'date',
|
||||
'flair',
|
||||
'postid',
|
||||
'redditor',
|
||||
'subreddit',
|
||||
'title',
|
||||
'upvotes',
|
||||
"date",
|
||||
"flair",
|
||||
"postid",
|
||||
"redditor",
|
||||
"subreddit",
|
||||
"title",
|
||||
"upvotes",
|
||||
)
|
||||
|
||||
def __init__(self, file_format_string: str, directory_format_string: str, time_format_string: str):
|
||||
if not self.validate_string(file_format_string):
|
||||
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
||||
self.file_format_string = file_format_string
|
||||
self.directory_format_string: list[str] = directory_format_string.split('/')
|
||||
self.directory_format_string: list[str] = directory_format_string.split("/")
|
||||
self.time_format_string = time_format_string
|
||||
|
||||
def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str:
|
||||
@@ -40,108 +40,111 @@ class FileNameFormatter:
|
||||
elif isinstance(submission, Comment):
|
||||
attributes = self._generate_name_dict_from_comment(submission)
|
||||
else:
|
||||
raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}')
|
||||
raise BulkDownloaderException(f"Cannot name object {type(submission).__name__}")
|
||||
result = format_string
|
||||
for key in attributes.keys():
|
||||
if re.search(fr'(?i).*{{{key}}}.*', result):
|
||||
key_value = str(attributes.get(key, 'unknown'))
|
||||
if re.search(rf"(?i).*{{{key}}}.*", result):
|
||||
key_value = str(attributes.get(key, "unknown"))
|
||||
key_value = FileNameFormatter._convert_unicode_escapes(key_value)
|
||||
key_value = key_value.replace('\\', '\\\\')
|
||||
result = re.sub(fr'(?i){{{key}}}', key_value, result)
|
||||
key_value = key_value.replace("\\", "\\\\")
|
||||
result = re.sub(rf"(?i){{{key}}}", key_value, result)
|
||||
|
||||
result = result.replace('/', '')
|
||||
result = result.replace("/", "")
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
if platform.system() == "Windows":
|
||||
result = FileNameFormatter._format_for_windows(result)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _convert_unicode_escapes(in_string: str) -> str:
|
||||
pattern = re.compile(r'(\\u\d{4})')
|
||||
pattern = re.compile(r"(\\u\d{4})")
|
||||
matches = re.search(pattern, in_string)
|
||||
if matches:
|
||||
for match in matches.groups():
|
||||
converted_match = bytes(match, 'utf-8').decode('unicode-escape')
|
||||
converted_match = bytes(match, "utf-8").decode("unicode-escape")
|
||||
in_string = in_string.replace(match, converted_match)
|
||||
return in_string
|
||||
|
||||
def _generate_name_dict_from_submission(self, submission: Submission) -> dict:
|
||||
submission_attributes = {
|
||||
'title': submission.title,
|
||||
'subreddit': submission.subreddit.display_name,
|
||||
'redditor': submission.author.name if submission.author else 'DELETED',
|
||||
'postid': submission.id,
|
||||
'upvotes': submission.score,
|
||||
'flair': submission.link_flair_text,
|
||||
'date': self._convert_timestamp(submission.created_utc),
|
||||
"title": submission.title,
|
||||
"subreddit": submission.subreddit.display_name,
|
||||
"redditor": submission.author.name if submission.author else "DELETED",
|
||||
"postid": submission.id,
|
||||
"upvotes": submission.score,
|
||||
"flair": submission.link_flair_text,
|
||||
"date": self._convert_timestamp(submission.created_utc),
|
||||
}
|
||||
return submission_attributes
|
||||
|
||||
def _convert_timestamp(self, timestamp: float) -> str:
|
||||
input_time = datetime.datetime.fromtimestamp(timestamp)
|
||||
if self.time_format_string.upper().strip() == 'ISO':
|
||||
if self.time_format_string.upper().strip() == "ISO":
|
||||
return input_time.isoformat()
|
||||
else:
|
||||
return input_time.strftime(self.time_format_string)
|
||||
|
||||
def _generate_name_dict_from_comment(self, comment: Comment) -> dict:
|
||||
comment_attributes = {
|
||||
'title': comment.submission.title,
|
||||
'subreddit': comment.subreddit.display_name,
|
||||
'redditor': comment.author.name if comment.author else 'DELETED',
|
||||
'postid': comment.id,
|
||||
'upvotes': comment.score,
|
||||
'flair': '',
|
||||
'date': self._convert_timestamp(comment.created_utc),
|
||||
"title": comment.submission.title,
|
||||
"subreddit": comment.subreddit.display_name,
|
||||
"redditor": comment.author.name if comment.author else "DELETED",
|
||||
"postid": comment.id,
|
||||
"upvotes": comment.score,
|
||||
"flair": "",
|
||||
"date": self._convert_timestamp(comment.created_utc),
|
||||
}
|
||||
return comment_attributes
|
||||
|
||||
def format_path(
|
||||
self,
|
||||
resource: Resource,
|
||||
destination_directory: Path,
|
||||
index: Optional[int] = None,
|
||||
self,
|
||||
resource: Resource,
|
||||
destination_directory: Path,
|
||||
index: Optional[int] = None,
|
||||
) -> Path:
|
||||
subfolder = Path(
|
||||
destination_directory,
|
||||
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string],
|
||||
)
|
||||
index = f'_{index}' if index else ''
|
||||
index = f"_{index}" if index else ""
|
||||
if not resource.extension:
|
||||
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
|
||||
raise BulkDownloaderException(f"Resource from {resource.url} has no extension")
|
||||
file_name = str(self._format_name(resource.source_submission, self.file_format_string))
|
||||
|
||||
file_name = re.sub(r'\n', ' ', file_name)
|
||||
file_name = re.sub(r"\n", " ", file_name)
|
||||
|
||||
if not re.match(r'.*\.$', file_name) and not re.match(r'^\..*', resource.extension):
|
||||
ending = index + '.' + resource.extension
|
||||
if not re.match(r".*\.$", file_name) and not re.match(r"^\..*", resource.extension):
|
||||
ending = index + "." + resource.extension
|
||||
else:
|
||||
ending = index + resource.extension
|
||||
|
||||
try:
|
||||
file_path = self.limit_file_name_length(file_name, ending, subfolder)
|
||||
except TypeError:
|
||||
raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}')
|
||||
raise BulkDownloaderException(f"Could not determine path name: {subfolder}, {index}, {resource.extension}")
|
||||
return file_path
|
||||
|
||||
@staticmethod
|
||||
def limit_file_name_length(filename: str, ending: str, root: Path) -> Path:
|
||||
root = root.resolve().expanduser()
|
||||
possible_id = re.search(r'((?:_\w{6})?$)', filename)
|
||||
possible_id = re.search(r"((?:_\w{6})?$)", filename)
|
||||
if possible_id:
|
||||
ending = possible_id.group(1) + ending
|
||||
filename = filename[:possible_id.start()]
|
||||
filename = filename[: possible_id.start()]
|
||||
max_path = FileNameFormatter.find_max_path_length()
|
||||
max_file_part_length_chars = 255 - len(ending)
|
||||
max_file_part_length_bytes = 255 - len(ending.encode('utf-8'))
|
||||
max_file_part_length_bytes = 255 - len(ending.encode("utf-8"))
|
||||
max_path_length = max_path - len(ending) - len(str(root)) - 1
|
||||
|
||||
out = Path(root, filename + ending)
|
||||
while any([len(filename) > max_file_part_length_chars,
|
||||
len(filename.encode('utf-8')) > max_file_part_length_bytes,
|
||||
len(str(out)) > max_path_length,
|
||||
]):
|
||||
while any(
|
||||
[
|
||||
len(filename) > max_file_part_length_chars,
|
||||
len(filename.encode("utf-8")) > max_file_part_length_bytes,
|
||||
len(str(out)) > max_path_length,
|
||||
]
|
||||
):
|
||||
filename = filename[:-1]
|
||||
out = Path(root, filename + ending)
|
||||
|
||||
@@ -150,44 +153,46 @@ class FileNameFormatter:
|
||||
@staticmethod
|
||||
def find_max_path_length() -> int:
|
||||
try:
|
||||
return int(subprocess.check_output(['getconf', 'PATH_MAX', '/']))
|
||||
return int(subprocess.check_output(["getconf", "PATH_MAX", "/"]))
|
||||
except (ValueError, subprocess.CalledProcessError, OSError):
|
||||
if platform.system() == 'Windows':
|
||||
if platform.system() == "Windows":
|
||||
return 260
|
||||
else:
|
||||
return 4096
|
||||
|
||||
def format_resource_paths(
|
||||
self,
|
||||
resources: list[Resource],
|
||||
destination_directory: Path,
|
||||
self,
|
||||
resources: list[Resource],
|
||||
destination_directory: Path,
|
||||
) -> list[tuple[Path, Resource]]:
|
||||
out = []
|
||||
if len(resources) == 1:
|
||||
try:
|
||||
out.append((self.format_path(resources[0], destination_directory, None), resources[0]))
|
||||
except BulkDownloaderException as e:
|
||||
logger.error(f'Could not generate file path for resource {resources[0].url}: {e}')
|
||||
logger.exception('Could not generate file path')
|
||||
logger.error(f"Could not generate file path for resource {resources[0].url}: {e}")
|
||||
logger.exception("Could not generate file path")
|
||||
else:
|
||||
for i, res in enumerate(resources, start=1):
|
||||
logger.log(9, f'Formatting filename with index {i}')
|
||||
logger.log(9, f"Formatting filename with index {i}")
|
||||
try:
|
||||
out.append((self.format_path(res, destination_directory, i), res))
|
||||
except BulkDownloaderException as e:
|
||||
logger.error(f'Could not generate file path for resource {res.url}: {e}')
|
||||
logger.exception('Could not generate file path')
|
||||
logger.error(f"Could not generate file path for resource {res.url}: {e}")
|
||||
logger.exception("Could not generate file path")
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def validate_string(test_string: str) -> bool:
|
||||
if not test_string:
|
||||
return False
|
||||
result = any([f'{{{key}}}' in test_string.lower() for key in FileNameFormatter.key_terms])
|
||||
result = any([f"{{{key}}}" in test_string.lower() for key in FileNameFormatter.key_terms])
|
||||
if result:
|
||||
if 'POSTID' not in test_string:
|
||||
logger.warning('Some files might not be downloaded due to name conflicts as filenames are'
|
||||
' not guaranteed to be be unique without {POSTID}')
|
||||
if "POSTID" not in test_string:
|
||||
logger.warning(
|
||||
"Some files might not be downloaded due to name conflicts as filenames are"
|
||||
" not guaranteed to be be unique without {POSTID}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -196,11 +201,11 @@ class FileNameFormatter:
|
||||
def _format_for_windows(input_string: str) -> str:
|
||||
invalid_characters = r'<>:"\/|?*'
|
||||
for char in invalid_characters:
|
||||
input_string = input_string.replace(char, '')
|
||||
input_string = input_string.replace(char, "")
|
||||
input_string = FileNameFormatter._strip_emojis(input_string)
|
||||
return input_string
|
||||
|
||||
@staticmethod
|
||||
def _strip_emojis(input_string: str) -> str:
|
||||
result = input_string.encode('ascii', errors='ignore').decode('utf-8')
|
||||
result = input_string.encode("ascii", errors="ignore").decode("utf-8")
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user