Add a combined command for the archiver and downloader: clone (#433)

* Simplify downloader function

* Add basic scraper class

* Add "scrape" command

* Rename "scrape" command to "clone"

* Add integration tests for clone command

* Update README

* Fix failing test
This commit is contained in:
Serene
2021-06-06 20:29:09 +10:00
committed by GitHub
parent a2f010c40d
commit 434aeb8feb
6 changed files with 139 additions and 34 deletions

View File

@@ -4,11 +4,12 @@
import os
import re
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch
import praw.models
import pytest
import bdfr.site_downloaders.download_factory
from bdfr.__main__ import setup_logging
from bdfr.configuration import Configuration
from bdfr.connector import RedditConnector
@@ -37,17 +38,30 @@ def downloader_mock(args: Configuration):
(('aaaaaa',), ('aaaaaa',), 0),
((), ('aaaaaa',), 0),
(('aaaaaa', 'bbbbbb'), ('aaaaaa',), 1),
(('aaaaaa', 'bbbbbb', 'cccccc'), ('aaaaaa',), 2),
))
def test_excluded_ids(test_ids: tuple[str], test_excluded: tuple[str], expected_len: int, downloader_mock: MagicMock):
@patch('bdfr.site_downloaders.download_factory.DownloadFactory.pull_lever')
def test_excluded_ids(
mock_function: MagicMock,
test_ids: tuple[str],
test_excluded: tuple[str],
expected_len: int,
downloader_mock: MagicMock,
):
downloader_mock.excluded_submission_ids = test_excluded
mock_function.return_value = MagicMock()
mock_function.return_value.__name__ = 'test'
test_submissions = []
for test_id in test_ids:
m = MagicMock()
m.id = test_id
m.subreddit.display_name.return_value = 'https://www.example.com/'
m.__class__ = praw.models.Submission
test_submissions.append(m)
downloader_mock.reddit_lists = [test_submissions]
RedditDownloader.download(downloader_mock)
assert downloader_mock._download_submission.call_count == expected_len
for submission in test_submissions:
RedditDownloader._download_submission(downloader_mock, submission)
assert mock_function.call_count == expected_len
@pytest.mark.online

View File

@@ -33,6 +33,17 @@ def create_basic_args_for_archive_runner(test_args: list[str], tmp_path: Path):
return out
def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
out = [
'clone',
str(tmp_path),
'-v',
'--config', 'test_config.cfg',
'--log', str(Path(tmp_path, 'test_log.txt')),
] + test_args
return out
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
@@ -343,3 +354,19 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'Some files might not be downloaded due to name conflicts' in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['-l', 'm2601g'],
['-s', 'TrollXChromosomes/', '-L', 1],
))
def test_cli_scrape_general(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'Downloaded submission' in result.output
assert 'Record for entry item' in result.output