Archiver is smarter for comments (#242)
* Add comment name generation to file name formatter * Refactor to reduce duplication * Refactor archive entry classes * Refactor archiver class a bit * Refactor method * Fix comment retrieval * Add comment-downloading to archiver * Update test * Update test
This commit is contained in:
2
bulkredditdownloader/tests/archive_entry/__init__.py
Normal file
2
bulkredditdownloader/tests/archive_entry/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import praw
|
||||
import pytest
|
||||
|
||||
from bulkredditdownloader.archive_entry.comment_archive_entry import CommentArchiveEntry
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_comment_id', 'expected_dict'), (
|
||||
('gstd4hk', {
|
||||
'author': 'james_pic',
|
||||
'subreddit': 'Python',
|
||||
'submission': 'mgi4op',
|
||||
'submission_title': '76% Faster CPython',
|
||||
}),
|
||||
))
|
||||
def test_get_comment_details(test_comment_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
|
||||
comment = reddit_instance.comment(id=test_comment_id)
|
||||
test_entry = CommentArchiveEntry(comment)
|
||||
result = test_entry.compile()
|
||||
assert all([result.get(key) == expected_dict[key] for key in expected_dict.keys()])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_comment_id', 'expected_min_comments'), (
|
||||
('gstd4hk', 4),
|
||||
('gsvyste', 3),
|
||||
('gsxnvvb', 5),
|
||||
))
|
||||
def test_get_comment_replies(test_comment_id: str, expected_min_comments: int, reddit_instance: praw.Reddit):
|
||||
comment = reddit_instance.comment(id=test_comment_id)
|
||||
test_entry = CommentArchiveEntry(comment)
|
||||
result = test_entry.compile()
|
||||
assert len(result.get('replies')) >= expected_min_comments
|
||||
@@ -4,7 +4,7 @@
|
||||
import praw
|
||||
import pytest
|
||||
|
||||
from bulkredditdownloader.archive_entry import ArchiveEntry
|
||||
from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -14,9 +14,9 @@ from bulkredditdownloader.archive_entry import ArchiveEntry
|
||||
))
|
||||
def test_get_comments(test_submission_id: str, min_comments: int, reddit_instance: praw.Reddit):
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
test_archive_entry = ArchiveEntry(test_submission)
|
||||
test_archive_entry._get_comments()
|
||||
assert len(test_archive_entry.comments) >= min_comments
|
||||
test_archive_entry = SubmissionArchiveEntry(test_submission)
|
||||
results = test_archive_entry._get_comments()
|
||||
assert len(results) >= min_comments
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -27,6 +27,6 @@ def test_get_comments(test_submission_id: str, min_comments: int, reddit_instanc
|
||||
))
|
||||
def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
test_archive_entry = ArchiveEntry(test_submission)
|
||||
test_archive_entry = SubmissionArchiveEntry(test_submission)
|
||||
test_archive_entry._get_post_details()
|
||||
assert all([test_archive_entry.post_details[key] == expected_dict[key] for key in expected_dict.keys()])
|
||||
assert all([test_archive_entry.post_details.get(key) == expected_dict[key] for key in expected_dict.keys()])
|
||||
@@ -10,7 +10,7 @@ from bulkredditdownloader.site_downloaders.erome import Erome
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.parametrize(('test_url', 'expected_urls'), (
|
||||
('https://www.erome.com/a/vqtPuLXh', ('https://s6.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
|
||||
('https://www.erome.com/a/vqtPuLXh', ('https://s11.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
|
||||
('https://www.erome.com/a/ORhX0FZz',
|
||||
('https://s4.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4',
|
||||
'https://s4.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4',
|
||||
|
||||
@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
|
||||
import praw
|
||||
import pytest
|
||||
|
||||
from bulkredditdownloader.archive_entry import ArchiveEntry
|
||||
from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
|
||||
from bulkredditdownloader.archiver import Archiver
|
||||
|
||||
|
||||
@@ -21,9 +21,9 @@ def test_write_submission_json(test_submission_id: str, tmp_path: Path, reddit_i
|
||||
test_path = Path(tmp_path, 'test.json')
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
archiver_mock.file_name_formatter.format_path.return_value = test_path
|
||||
test_entry = ArchiveEntry(test_submission)
|
||||
Archiver._write_submission_json(archiver_mock, test_entry)
|
||||
assert test_path.exists()
|
||||
test_entry = SubmissionArchiveEntry(test_submission)
|
||||
Archiver._write_entry_json(archiver_mock, test_entry)
|
||||
archiver_mock._write_content_to_disk.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -36,9 +36,9 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
|
||||
test_path = Path(tmp_path, 'test.xml')
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
archiver_mock.file_name_formatter.format_path.return_value = test_path
|
||||
test_entry = ArchiveEntry(test_submission)
|
||||
Archiver._write_submission_xml(archiver_mock, test_entry)
|
||||
assert test_path.exists()
|
||||
test_entry = SubmissionArchiveEntry(test_submission)
|
||||
Archiver._write_entry_xml(archiver_mock, test_entry)
|
||||
archiver_mock._write_content_to_disk.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -48,9 +48,10 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
|
||||
))
|
||||
def test_write_submission_yaml(test_submission_id: str, tmp_path: Path, reddit_instance: praw.Reddit):
|
||||
archiver_mock = MagicMock()
|
||||
archiver_mock.download_directory = tmp_path
|
||||
test_path = Path(tmp_path, 'test.yaml')
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
archiver_mock.file_name_formatter.format_path.return_value = test_path
|
||||
test_entry = ArchiveEntry(test_submission)
|
||||
Archiver._write_submission_yaml(archiver_mock, test_entry)
|
||||
assert test_path.exists()
|
||||
test_entry = SubmissionArchiveEntry(test_submission)
|
||||
Archiver._write_entry_yaml(archiver_mock, test_entry)
|
||||
archiver_mock._write_content_to_disk.assert_called_once()
|
||||
|
||||
@@ -22,11 +22,12 @@ def submission() -> MagicMock:
|
||||
test.score = 1000
|
||||
test.link_flair_text = 'test_flair'
|
||||
test.created_utc = 123456789
|
||||
test.__class__ = praw.models.Submission
|
||||
return test
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def reddit_submission(reddit_instance) -> praw.models.Submission:
|
||||
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
|
||||
return reddit_instance.submission(id='lgilgt')
|
||||
|
||||
|
||||
@@ -137,6 +138,7 @@ def test_format_multiple_resources():
|
||||
new_mock.url = 'https://example.com/test.png'
|
||||
new_mock.extension = '.png'
|
||||
new_mock.source_submission.title = 'test'
|
||||
new_mock.source_submission.__class__ = praw.models.Submission
|
||||
mocks.append(new_mock)
|
||||
test_formatter = FileNameFormatter('{TITLE}', '')
|
||||
results = test_formatter.format_resource_paths(mocks, Path('.'))
|
||||
@@ -176,13 +178,12 @@ def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str
|
||||
assert result.endswith(expected_end)
|
||||
|
||||
|
||||
def test_shorten_filenames(tmp_path: Path):
|
||||
test_submission = MagicMock()
|
||||
test_submission.title = 'A' * 300
|
||||
test_submission.author.name = 'test'
|
||||
test_submission.subreddit.display_name = 'test'
|
||||
test_submission.id = 'BBBBBB'
|
||||
test_resource = Resource(test_submission, 'www.example.com/empty', '.jpeg')
|
||||
def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
|
||||
submission.title = 'A' * 300
|
||||
submission.author.name = 'test'
|
||||
submission.subreddit.display_name = 'test'
|
||||
submission.id = 'BBBBBB'
|
||||
test_resource = Resource(submission, 'www.example.com/empty', '.jpeg')
|
||||
test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}')
|
||||
result = test_formatter.format_path(test_resource, tmp_path)
|
||||
result.parent.mkdir(parents=True)
|
||||
@@ -212,3 +213,50 @@ def test_format_file_name_for_windows(test_string: str, expected: str):
|
||||
def test_strip_emojies(test_string: str, expected: str):
|
||||
result = FileNameFormatter._strip_emojis(test_string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'expected'), (
|
||||
('mfuteh', {'title': 'Why Do Interviewers Ask Linked List Questions?', 'redditor': 'mjgardner'}),
|
||||
))
|
||||
def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
|
||||
test_submission = reddit_instance.submission(id=test_submission_id)
|
||||
result = FileNameFormatter._generate_name_dict_from_submission(test_submission)
|
||||
assert all([result.get(key) == expected[key] for key in expected.keys()])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_comment_id', 'expected'), (
|
||||
('gsq0yuw', {
|
||||
'title': 'Why Do Interviewers Ask Linked List Questions?',
|
||||
'redditor': 'Doctor-Dapper',
|
||||
'postid': 'gsq0yuw',
|
||||
'flair': '',
|
||||
}),
|
||||
))
|
||||
def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
|
||||
test_comment = reddit_instance.comment(id=test_comment_id)
|
||||
result = FileNameFormatter._generate_name_dict_from_comment(test_comment)
|
||||
assert all([result.get(key) == expected[key] for key in expected.keys()])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
|
||||
('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
|
||||
('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
|
||||
))
|
||||
def test_format_archive_entry_comment(
|
||||
test_file_scheme: str,
|
||||
test_folder_scheme: str,
|
||||
test_comment_id: str,
|
||||
expected_name: str,
|
||||
tmp_path: Path,
|
||||
reddit_instance: praw.Reddit):
|
||||
test_comment = reddit_instance.comment(id=test_comment_id)
|
||||
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
|
||||
test_entry = Resource(test_comment, '', '.json')
|
||||
result = test_formatter.format_path(test_entry, tmp_path)
|
||||
assert result.name == expected_name
|
||||
|
||||
@@ -168,6 +168,21 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'gstd4hk'],
|
||||
['-l', 'm2601g'],
|
||||
))
|
||||
def test_cli_archive_single(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||
@@ -184,7 +199,7 @@ def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
|
||||
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -200,7 +215,7 @@ def test_cli_archive_long(test_args: list[str], tmp_path: Path):
|
||||
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
|
||||
Reference in New Issue
Block a user