Archiver is smarter for comments (#242)

* Add comment name generation to file name formatter

* Refactor to reduce duplication

* Refactor archive entry classes

* Refactor archiver class a bit

* Refactor method

* Fix comment retrieval

* Add comment-downloading to archiver

* Update test

* Update test
This commit is contained in:
Serene
2021-04-01 18:37:20 +10:00
committed by Ali Parlakci
parent 75d74a5362
commit 32c9d6184c
14 changed files with 329 additions and 138 deletions

View File

@@ -0,0 +1,2 @@
#!/usr/bin/env python3
# coding=utf-8

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# coding=utf-8
import praw
import pytest
from bulkredditdownloader.archive_entry.comment_archive_entry import CommentArchiveEntry
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_comment_id', 'expected_dict'), (
('gstd4hk', {
'author': 'james_pic',
'subreddit': 'Python',
'submission': 'mgi4op',
'submission_title': '76% Faster CPython',
}),
))
def test_get_comment_details(test_comment_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
comment = reddit_instance.comment(id=test_comment_id)
test_entry = CommentArchiveEntry(comment)
result = test_entry.compile()
assert all([result.get(key) == expected_dict[key] for key in expected_dict.keys()])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_comment_id', 'expected_min_comments'), (
('gstd4hk', 4),
('gsvyste', 3),
('gsxnvvb', 5),
))
def test_get_comment_replies(test_comment_id: str, expected_min_comments: int, reddit_instance: praw.Reddit):
comment = reddit_instance.comment(id=test_comment_id)
test_entry = CommentArchiveEntry(comment)
result = test_entry.compile()
assert len(result.get('replies')) >= expected_min_comments

View File

@@ -4,7 +4,7 @@
import praw
import pytest
from bulkredditdownloader.archive_entry import ArchiveEntry
from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
@pytest.mark.online
@@ -14,9 +14,9 @@ from bulkredditdownloader.archive_entry import ArchiveEntry
))
def test_get_comments(test_submission_id: str, min_comments: int, reddit_instance: praw.Reddit):
test_submission = reddit_instance.submission(id=test_submission_id)
test_archive_entry = ArchiveEntry(test_submission)
test_archive_entry._get_comments()
assert len(test_archive_entry.comments) >= min_comments
test_archive_entry = SubmissionArchiveEntry(test_submission)
results = test_archive_entry._get_comments()
assert len(results) >= min_comments
@pytest.mark.online
@@ -27,6 +27,6 @@ def test_get_comments(test_submission_id: str, min_comments: int, reddit_instanc
))
def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit):
test_submission = reddit_instance.submission(id=test_submission_id)
test_archive_entry = ArchiveEntry(test_submission)
test_archive_entry = SubmissionArchiveEntry(test_submission)
test_archive_entry._get_post_details()
assert all([test_archive_entry.post_details[key] == expected_dict[key] for key in expected_dict.keys()])
assert all([test_archive_entry.post_details.get(key) == expected_dict[key] for key in expected_dict.keys()])

View File

@@ -10,7 +10,7 @@ from bulkredditdownloader.site_downloaders.erome import Erome
@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected_urls'), (
('https://www.erome.com/a/vqtPuLXh', ('https://s6.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
('https://www.erome.com/a/vqtPuLXh', ('https://s11.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',)),
('https://www.erome.com/a/ORhX0FZz',
('https://s4.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4',
'https://s4.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4',

View File

@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
import praw
import pytest
from bulkredditdownloader.archive_entry import ArchiveEntry
from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
from bulkredditdownloader.archiver import Archiver
@@ -21,9 +21,9 @@ def test_write_submission_json(test_submission_id: str, tmp_path: Path, reddit_i
test_path = Path(tmp_path, 'test.json')
test_submission = reddit_instance.submission(id=test_submission_id)
archiver_mock.file_name_formatter.format_path.return_value = test_path
test_entry = ArchiveEntry(test_submission)
Archiver._write_submission_json(archiver_mock, test_entry)
assert test_path.exists()
test_entry = SubmissionArchiveEntry(test_submission)
Archiver._write_entry_json(archiver_mock, test_entry)
archiver_mock._write_content_to_disk.assert_called_once()
@pytest.mark.online
@@ -36,9 +36,9 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
test_path = Path(tmp_path, 'test.xml')
test_submission = reddit_instance.submission(id=test_submission_id)
archiver_mock.file_name_formatter.format_path.return_value = test_path
test_entry = ArchiveEntry(test_submission)
Archiver._write_submission_xml(archiver_mock, test_entry)
assert test_path.exists()
test_entry = SubmissionArchiveEntry(test_submission)
Archiver._write_entry_xml(archiver_mock, test_entry)
archiver_mock._write_content_to_disk.assert_called_once()
@pytest.mark.online
@@ -48,9 +48,10 @@ def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_in
))
def test_write_submission_yaml(test_submission_id: str, tmp_path: Path, reddit_instance: praw.Reddit):
archiver_mock = MagicMock()
archiver_mock.download_directory = tmp_path
test_path = Path(tmp_path, 'test.yaml')
test_submission = reddit_instance.submission(id=test_submission_id)
archiver_mock.file_name_formatter.format_path.return_value = test_path
test_entry = ArchiveEntry(test_submission)
Archiver._write_submission_yaml(archiver_mock, test_entry)
assert test_path.exists()
test_entry = SubmissionArchiveEntry(test_submission)
Archiver._write_entry_yaml(archiver_mock, test_entry)
archiver_mock._write_content_to_disk.assert_called_once()

View File

@@ -22,11 +22,12 @@ def submission() -> MagicMock:
test.score = 1000
test.link_flair_text = 'test_flair'
test.created_utc = 123456789
test.__class__ = praw.models.Submission
return test
@pytest.fixture()
def reddit_submission(reddit_instance) -> praw.models.Submission:
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
return reddit_instance.submission(id='lgilgt')
@@ -137,6 +138,7 @@ def test_format_multiple_resources():
new_mock.url = 'https://example.com/test.png'
new_mock.extension = '.png'
new_mock.source_submission.title = 'test'
new_mock.source_submission.__class__ = praw.models.Submission
mocks.append(new_mock)
test_formatter = FileNameFormatter('{TITLE}', '')
results = test_formatter.format_resource_paths(mocks, Path('.'))
@@ -176,13 +178,12 @@ def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str
assert result.endswith(expected_end)
def test_shorten_filenames(tmp_path: Path):
test_submission = MagicMock()
test_submission.title = 'A' * 300
test_submission.author.name = 'test'
test_submission.subreddit.display_name = 'test'
test_submission.id = 'BBBBBB'
test_resource = Resource(test_submission, 'www.example.com/empty', '.jpeg')
def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
submission.title = 'A' * 300
submission.author.name = 'test'
submission.subreddit.display_name = 'test'
submission.id = 'BBBBBB'
test_resource = Resource(submission, 'www.example.com/empty', '.jpeg')
test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}')
result = test_formatter.format_path(test_resource, tmp_path)
result.parent.mkdir(parents=True)
@@ -212,3 +213,50 @@ def test_format_file_name_for_windows(test_string: str, expected: str):
def test_strip_emojies(test_string: str, expected: str):
result = FileNameFormatter._strip_emojis(test_string)
assert result == expected
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_submission_id', 'expected'), (
('mfuteh', {'title': 'Why Do Interviewers Ask Linked List Questions?', 'redditor': 'mjgardner'}),
))
def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
test_submission = reddit_instance.submission(id=test_submission_id)
result = FileNameFormatter._generate_name_dict_from_submission(test_submission)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_comment_id', 'expected'), (
('gsq0yuw', {
'title': 'Why Do Interviewers Ask Linked List Questions?',
'redditor': 'Doctor-Dapper',
'postid': 'gsq0yuw',
'flair': '',
}),
))
def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
test_comment = reddit_instance.comment(id=test_comment_id)
result = FileNameFormatter._generate_name_dict_from_comment(test_comment)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
))
def test_format_archive_entry_comment(
test_file_scheme: str,
test_folder_scheme: str,
test_comment_id: str,
expected_name: str,
tmp_path: Path,
reddit_instance: praw.Reddit):
test_comment = reddit_instance.comment(id=test_comment_id)
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
test_entry = Resource(test_comment, '', '.json')
result = test_formatter.format_path(test_entry, tmp_path)
assert result.name == expected_name

View File

@@ -168,6 +168,21 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
assert result.exit_code == 0
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['-l', 'gstd4hk'],
['-l', 'm2601g'],
))
def test_cli_archive_single(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@@ -184,7 +199,7 @@ def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
@pytest.mark.online
@@ -200,7 +215,7 @@ def test_cli_archive_long(test_args: list[str], tmp_path: Path):
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
@pytest.mark.online