From 9a1e1ebea1750923a5e558326d5f192ae8adbe6f Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Tue, 18 May 2021 12:39:08 +1000 Subject: [PATCH] Add path limit fix --- bdfr/file_name_formatter.py | 27 ++++++++++++++---- tests/test_file_name_formatter.py | 47 +++++++++++++++++++++---------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index c6c13c2..2fbf95f 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -4,6 +4,7 @@ import datetime import logging import platform import re +import subprocess from pathlib import Path from typing import Optional @@ -104,32 +105,46 @@ class FileNameFormatter: ) -> Path: subfolder = Path( destination_directory, - *[self._format_name(resource.source_submission, part) for part in self.directory_format_string] + *[self._format_name(resource.source_submission, part) for part in self.directory_format_string], ) index = f'_{str(index)}' if index else '' if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') ending = index + resource.extension file_name = str(self._format_name(resource.source_submission, self.file_format_string)) - file_name = self._limit_file_name_length(file_name, ending) try: - file_path = Path(subfolder, file_name) + file_path = self._limit_file_name_length(file_name, ending, subfolder) except TypeError: raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}') return file_path @staticmethod - def _limit_file_name_length(filename: str, ending: str) -> str: + def _limit_file_name_length(filename: str, ending: str, root: Path) -> Path: + root = root.resolve().expanduser() possible_id = re.search(r'((?:_\w{6})?$)', filename) if possible_id: ending = possible_id.group(1) + ending filename = filename[:possible_id.start()] + max_path = FileNameFormatter.find_max_path_length() max_length_chars = 255 - len(ending) max_length_bytes = 255 - len(ending.encode('utf-8')) - while len(filename) > max_length_chars or len(filename.encode('utf-8')) > max_length_bytes: + max_path_length = max_path - len(ending) - len(str(root)) - 1 + while len(filename) > max_length_chars or \ + len(filename.encode('utf-8')) > max_length_bytes or \ + len(filename) > max_path_length: filename = filename[:-1] - return filename + ending + return Path(root, filename + ending) + + @staticmethod + def find_max_path_length() -> int: + try: + return int(subprocess.check_output(['getconf', 'PATH_MAX', '/'])) + except (ValueError, subprocess.CalledProcessError, OSError): + if platform.system() == 'Windows': + return 260 + else: + return 4096 def format_resource_paths( self, diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index b1faf86..cc13e0c 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 # coding=utf-8 +import platform +import unittest.mock from datetime import datetime from pathlib import Path from typing import Optional from unittest.mock import MagicMock -import platform import praw.models import pytest @@ -28,10 +29,10 @@ def submission() -> MagicMock: return test -def do_test_string_equality(result: str, expected: str) -> bool: +def do_test_string_equality(result: [Path, str], expected: str) -> bool: if platform.system() == 'Windows': expected = FileNameFormatter._format_for_windows(expected) - return expected == result + return str(result).endswith(expected) def do_test_path_equality(result: Path, expected: str) -> bool: @@ -41,7 +42,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool: expected = Path(*expected) else: expected = Path(expected) - return result == expected + return str(result).endswith(str(expected)) @pytest.fixture(scope='session') @@ -173,7 +174,9 @@ def test_format_multiple_resources(): test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') results = test_formatter.format_resource_paths(mocks, Path('.')) results = set([str(res[0]) for res in results]) - assert results == {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'} + expected = set([str(Path(Path('.'), name).resolve()) + for name in ('test_1.png', 'test_2.png', 'test_3.png', 'test_4.png')]) + assert results == expected @pytest.mark.parametrize(('test_filename', 'test_ending'), ( @@ -183,10 +186,11 @@ def test_format_multiple_resources(): ('😍💕✨' * 100, '_1.png'), )) def test_limit_filename_length(test_filename: str, test_ending: str): - result = FileNameFormatter._limit_file_name_length(test_filename, test_ending) - assert len(result) <= 255 - assert len(result.encode('utf-8')) <= 255 - assert isinstance(result, str) + result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.')) + assert len(result.name) <= 255 + assert len(result.name.encode('utf-8')) <= 255 + assert len(str(result)) <= FileNameFormatter.find_max_path_length() + assert isinstance(result, Path) @pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), ( @@ -201,11 +205,11 @@ def test_limit_filename_length(test_filename: str, test_ending: str): ('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'), )) def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str): - result = FileNameFormatter._limit_file_name_length(test_filename, test_ending) - assert len(result) <= 255 - assert len(result.encode('utf-8')) <= 255 - assert isinstance(result, str) - assert result.endswith(expected_end) + result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.')) + assert len(result.name) <= 255 + assert len(result.name.encode('utf-8')) <= 255 + assert result.name.endswith(expected_end) + assert len(str(result)) <= FileNameFormatter.find_max_path_length() def test_shorten_filenames(submission: MagicMock, tmp_path: Path): @@ -295,7 +299,7 @@ def test_format_archive_entry_comment( test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO') test_entry = Resource(test_comment, '', '.json') result = test_formatter.format_path(test_entry, tmp_path) - assert do_test_string_equality(result.name, expected_name) + assert do_test_string_equality(result, expected_name) @pytest.mark.parametrize(('test_folder_scheme', 'expected'), ( @@ -364,3 +368,16 @@ def test_time_string_formats(test_time_format: str, expected: str): test_formatter = FileNameFormatter('{TITLE}', '', test_time_format) result = test_formatter._convert_timestamp(test_time.timestamp()) assert result == expected + + +def test_get_max_path_length(): + result = FileNameFormatter.find_max_path_length() + assert result in (4096, 260) + + +def test_windows_max_path(): + with unittest.mock.patch('platform.system', return_value='Windows'): + with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260): + result = FileNameFormatter._limit_file_name_length('test' * 50, '_1.png', Path('test' * 25)) + assert len(str(result)) <= 260 + assert len(result.name) <= 75