Archiver is smarter for comments (#242)

* Add comment name generation to file name formatter * Refactor to reduce duplication * Refactor archive entry classes * Refactor archiver class a bit * Refactor method * Fix comment retrieval * Add comment-downloading to archiver * Update test * Update test
2021-04-01 18:37:20 +10:00
parent 75d74a5362
commit 32c9d6184c
14 changed files with 329 additions and 138 deletions
--- a/bulkredditdownloader/tests/archive_entry/init.py
+++ b/bulkredditdownloader/tests/archive_entry/init.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+# coding=utf-8
--- a/bulkredditdownloader/tests/archive_entry/test_comment_archive_entry.py
+++ b/bulkredditdownloader/tests/archive_entry/test_comment_archive_entry.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# coding=utf-8
+
+import praw
+import pytest
+
+from bulkredditdownloader.archive_entry.comment_archive_entry import CommentArchiveEntry
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.parametrize(('test_comment_id', 'expected_dict'), (
+    ('gstd4hk', {
+        'author': 'james_pic',
+        'subreddit': 'Python',
+        'submission': 'mgi4op',
+        'submission_title': '76% Faster CPython',
+    }),
+))
+def test_get_comment_details(test_comment_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
+    comment = reddit_instance.comment(id=test_comment_id)
+    test_entry = CommentArchiveEntry(comment)
+    result = test_entry.compile()
+    assert all([result.get(key) == expected_dict[key] for key in expected_dict.keys()])
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.parametrize(('test_comment_id', 'expected_min_comments'), (
+    ('gstd4hk', 4),
+    ('gsvyste', 3),
+    ('gsxnvvb', 5),
+))
+def test_get_comment_replies(test_comment_id: str, expected_min_comments: int, reddit_instance: praw.Reddit):
+    comment = reddit_instance.comment(id=test_comment_id)
+    test_entry = CommentArchiveEntry(comment)
+    result = test_entry.compile()
+    assert len(result.get('replies')) >= expected_min_comments
--- a/bulkredditdownloader/tests/archive_entry/test_submission_archive_entry.py
+++ b/bulkredditdownloader/tests/archive_entry/test_submission_archive_entry.py
@@ -4,7 +4,7 @@
 import praw
 import pytest

-from bulkredditdownloader.archive_entry import ArchiveEntry
+from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry


@pytest.mark.online
@@ -14,9 +14,9 @@ from bulkredditdownloader.archive_entry import ArchiveEntry
 ))
 def test_get_comments(test_submission_id: str, min_comments: int, reddit_instance: praw.Reddit):
    test_submission = reddit_instance.submission(id=test_submission_id)
-    test_archive_entry = ArchiveEntry(test_submission)
-    test_archive_entry._get_comments()
-    assert len(test_archive_entry.comments) >= min_comments
+    test_archive_entry = SubmissionArchiveEntry(test_submission)
+    results = test_archive_entry._get_comments()
+    assert len(results) >= min_comments


@pytest.mark.online
@@ -27,6 +27,6 @@ def test_get_comments(test_submission_id: str, min_comments: int, reddit_instanc
 ))
 def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
    test_submission = reddit_instance.submission(id=test_submission_id)
-    test_archive_entry = ArchiveEntry(test_submission)
+    test_archive_entry = SubmissionArchiveEntry(test_submission)
    test_archive_entry._get_post_details()
-    assert all([test_archive_entry.post_details[key] == expected_dict[key] for key in expected_dict.keys()])
+    assert all([test_archive_entry.post_details.get(key) == expected_dict[key] for key in expected_dict.keys()])
--- a/bulkredditdownloader/tests/downloaders/test_erome.py
+++ b/bulkredditdownloader/tests/downloaders/test_erome.py
@@ -10,7 +10,7 @@ from bulkredditdownloader.site_downloaders.erome import Erome

@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected_urls'), (
-    ('https://www.erome.com/a/vqtPuLXh', ('https://s6.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
+    ('https://www.erome.com/a/vqtPuLXh', ('https://s11.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
    ('https://www.erome.com/a/ORhX0FZz',
     ('https://s4.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4',
      'https://s4.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4',
--- a/bulkredditdownloader/tests/test_archiver.py
+++ b/bulkredditdownloader/tests/test_archiver.py
@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
 import praw
 import pytest

-from bulkredditdownloader.archive_entry import ArchiveEntry
+from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
 from bulkredditdownloader.archiver import Archiver


@@ -21,9 +21,9 @@ def test_write_submission_json(test_submission_id: str, tmp_path: Path, reddit_i
    test_path = Path(tmp_path, 'test.json')
    test_submission = reddit_instance.submission(id=test_submission_id)
    archiver_mock.file_name_formatter.format_path.return_value = test_path
-    test_entry = ArchiveEntry(test_submission)
-    Archiver._write_submission_json(archiver_mock, test_entry)
-    assert test_path.exists()
+    test_entry = SubmissionArchiveEntry(test_submission)
+    Archiver._write_entry_json(archiver_mock, test_entry)
+    archiver_mock._write_content_to_disk.assert_called_once()


@pytest.mark.online
@@ -36,9 +36,9 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
    test_path = Path(tmp_path, 'test.xml')
    test_submission = reddit_instance.submission(id=test_submission_id)
    archiver_mock.file_name_formatter.format_path.return_value = test_path
-    test_entry = ArchiveEntry(test_submission)
-    Archiver._write_submission_xml(archiver_mock, test_entry)
-    assert test_path.exists()
+    test_entry = SubmissionArchiveEntry(test_submission)
+    Archiver._write_entry_xml(archiver_mock, test_entry)
+    archiver_mock._write_content_to_disk.assert_called_once()


@pytest.mark.online
@@ -48,9 +48,10 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
 ))
 def test_write_submission_yaml(test_submission_id: str, tmp_path: Path, reddit_instance: praw.Reddit):
    archiver_mock = MagicMock()
+    archiver_mock.download_directory = tmp_path
    test_path = Path(tmp_path, 'test.yaml')
    test_submission = reddit_instance.submission(id=test_submission_id)
    archiver_mock.file_name_formatter.format_path.return_value = test_path
-    test_entry = ArchiveEntry(test_submission)
-    Archiver._write_submission_yaml(archiver_mock, test_entry)
-    assert test_path.exists()
+    test_entry = SubmissionArchiveEntry(test_submission)
+    Archiver._write_entry_yaml(archiver_mock, test_entry)
+    archiver_mock._write_content_to_disk.assert_called_once()
--- a/bulkredditdownloader/tests/test_file_name_formatter.py
+++ b/bulkredditdownloader/tests/test_file_name_formatter.py
@@ -22,11 +22,12 @@ def submission() -> MagicMock:
    test.score = 1000
    test.link_flair_text = 'test_flair'
    test.created_utc = 123456789
+    test.__class__ = praw.models.Submission
    return test


@pytest.fixture()
-def reddit_submission(reddit_instance) -> praw.models.Submission:
+def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
    return reddit_instance.submission(id='lgilgt')


@@ -137,6 +138,7 @@ def test_format_multiple_resources():
        new_mock.url = 'https://example.com/test.png'
        new_mock.extension = '.png'
        new_mock.source_submission.title = 'test'
+        new_mock.source_submission.__class__ = praw.models.Submission
        mocks.append(new_mock)
    test_formatter = FileNameFormatter('{TITLE}', '')
    results = test_formatter.format_resource_paths(mocks, Path('.'))
@@ -176,13 +178,12 @@ def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str
    assert result.endswith(expected_end)


-def test_shorten_filenames(tmp_path: Path):
-    test_submission = MagicMock()
-    test_submission.title = 'A' * 300
-    test_submission.author.name = 'test'
-    test_submission.subreddit.display_name = 'test'
-    test_submission.id = 'BBBBBB'
-    test_resource = Resource(test_submission, 'www.example.com/empty', '.jpeg')
+def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
+    submission.title = 'A' * 300
+    submission.author.name = 'test'
+    submission.subreddit.display_name = 'test'
+    submission.id = 'BBBBBB'
+    test_resource = Resource(submission, 'www.example.com/empty', '.jpeg')
    test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}')
    result = test_formatter.format_path(test_resource, tmp_path)
    result.parent.mkdir(parents=True)
@@ -212,3 +213,50 @@ def test_format_file_name_for_windows(test_string: str, expected: str):
 def test_strip_emojies(test_string: str, expected: str):
    result = FileNameFormatter._strip_emojis(test_string)
    assert result == expected
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.parametrize(('test_submission_id', 'expected'), (
+    ('mfuteh', {'title': 'Why Do Interviewers Ask Linked List Questions?', 'redditor': 'mjgardner'}),
+))
+def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
+    test_submission = reddit_instance.submission(id=test_submission_id)
+    result = FileNameFormatter._generate_name_dict_from_submission(test_submission)
+    assert all([result.get(key) == expected[key] for key in expected.keys()])
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.parametrize(('test_comment_id', 'expected'), (
+    ('gsq0yuw', {
+        'title': 'Why Do Interviewers Ask Linked List Questions?',
+        'redditor': 'Doctor-Dapper',
+        'postid': 'gsq0yuw',
+        'flair': '',
+    }),
+))
+def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
+    test_comment = reddit_instance.comment(id=test_comment_id)
+    result = FileNameFormatter._generate_name_dict_from_comment(test_comment)
+    assert all([result.get(key) == expected[key] for key in expected.keys()])
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
+    ('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
+    ('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
+))
+def test_format_archive_entry_comment(
+        test_file_scheme: str,
+        test_folder_scheme: str,
+        test_comment_id: str,
+        expected_name: str,
+        tmp_path: Path,
+        reddit_instance: praw.Reddit):
+    test_comment = reddit_instance.comment(id=test_comment_id)
+    test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
+    test_entry = Resource(test_comment, '', '.json')
+    result = test_formatter.format_path(test_entry, tmp_path)
+    assert result.name == expected_name
--- a/bulkredditdownloader/tests/test_integration.py
+++ b/bulkredditdownloader/tests/test_integration.py
@@ -168,6 +168,21 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
    assert result.exit_code == 0


+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
+@pytest.mark.parametrize('test_args', (
+    ['-l', 'gstd4hk'],
+    ['-l', 'm2601g'],
+))
+def test_cli_archive_single(test_args: list[str], tmp_path: Path):
+    runner = CliRunner()
+    test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
+    result = runner.invoke(cli, test_args)
+    assert result.exit_code == 0
+    assert re.search(r'Writing entry .*? to file in .*? format', result.output)
+
+
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@@ -184,7 +199,7 @@ def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
    test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
    result = runner.invoke(cli, test_args)
    assert result.exit_code == 0
-    assert re.search(r'Writing submission .*? to file in .*? format', result.output)
+    assert re.search(r'Writing entry .*? to file in .*? format', result.output)


@pytest.mark.online
@@ -200,7 +215,7 @@ def test_cli_archive_long(test_args: list[str], tmp_path: Path):
    test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
    result = runner.invoke(cli, test_args)
    assert result.exit_code == 0
-    assert re.search(r'Writing submission .*? to file in .*? format', result.output)
+    assert re.search(r'Writing entry .*? to file in .*? format', result.output)


@pytest.mark.online