Add a combined command for the archiver and downloader: clone (#433)
* Simplify downloader function * Add basic scraper class * Add "scrape" command * Rename "scrape" command to "clone" * Add integration tests for clone command * Update README * Fix failing test
This commit is contained in:
@@ -4,11 +4,12 @@
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import praw.models
|
||||
import pytest
|
||||
|
||||
import bdfr.site_downloaders.download_factory
|
||||
from bdfr.__main__ import setup_logging
|
||||
from bdfr.configuration import Configuration
|
||||
from bdfr.connector import RedditConnector
|
||||
@@ -37,17 +38,30 @@ def downloader_mock(args: Configuration):
|
||||
(('aaaaaa',), ('aaaaaa',), 0),
|
||||
((), ('aaaaaa',), 0),
|
||||
(('aaaaaa', 'bbbbbb'), ('aaaaaa',), 1),
|
||||
(('aaaaaa', 'bbbbbb', 'cccccc'), ('aaaaaa',), 2),
|
||||
))
|
||||
def test_excluded_ids(test_ids: tuple[str], test_excluded: tuple[str], expected_len: int, downloader_mock: MagicMock):
|
||||
@patch('bdfr.site_downloaders.download_factory.DownloadFactory.pull_lever')
|
||||
def test_excluded_ids(
|
||||
mock_function: MagicMock,
|
||||
test_ids: tuple[str],
|
||||
test_excluded: tuple[str],
|
||||
expected_len: int,
|
||||
downloader_mock: MagicMock,
|
||||
):
|
||||
downloader_mock.excluded_submission_ids = test_excluded
|
||||
mock_function.return_value = MagicMock()
|
||||
mock_function.return_value.__name__ = 'test'
|
||||
test_submissions = []
|
||||
for test_id in test_ids:
|
||||
m = MagicMock()
|
||||
m.id = test_id
|
||||
m.subreddit.display_name.return_value = 'https://www.example.com/'
|
||||
m.__class__ = praw.models.Submission
|
||||
test_submissions.append(m)
|
||||
downloader_mock.reddit_lists = [test_submissions]
|
||||
RedditDownloader.download(downloader_mock)
|
||||
assert downloader_mock._download_submission.call_count == expected_len
|
||||
for submission in test_submissions:
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
assert mock_function.call_count == expected_len
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
|
||||
@@ -33,6 +33,17 @@ def create_basic_args_for_archive_runner(test_args: list[str], tmp_path: Path):
|
||||
return out
|
||||
|
||||
|
||||
def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
|
||||
out = [
|
||||
'clone',
|
||||
str(tmp_path),
|
||||
'-v',
|
||||
'--config', 'test_config.cfg',
|
||||
'--log', str(Path(tmp_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@@ -343,3 +354,19 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Some files might not be downloaded due to name conflicts' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'm2601g'],
|
||||
['-s', 'TrollXChromosomes/', '-L', 1],
|
||||
))
|
||||
def test_cli_scrape_general(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Downloaded submission' in result.output
|
||||
assert 'Record for entry item' in result.output
|
||||
|
||||
Reference in New Issue
Block a user