Add integration tests for archiver

This commit is contained in:
Serene-Arc
2021-03-14 11:11:37 +10:00
committed by Ali Parlakci
parent c2d3cfd50f
commit b08c31a1db
3 changed files with 83 additions and 21 deletions

View File

@@ -5,12 +5,37 @@ import sys
import click import click
from bulkredditdownloader.archiver import Archiver
from bulkredditdownloader.configuration import Configuration from bulkredditdownloader.configuration import Configuration
from bulkredditdownloader.downloader import RedditDownloader from bulkredditdownloader.downloader import RedditDownloader
from bulkredditdownloader.exceptions import BulkDownloaderException
logger = logging.getLogger() logger = logging.getLogger()
_common_options = [
click.argument('directory', type=str),
click.option('--config', type=str, default=None),
click.option('-v', '--verbose', default=None, count=True),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
click.option('-L', '--limit', default=None, type=int),
click.option('--authenticate', is_flag=True, default=None),
click.option('--submitted', is_flag=True, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('--saved', is_flag=True, default=None),
click.option('--search', default=None, type=str),
click.option('-u', '--user', type=str, default=None),
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
'controversial', 'rising', 'relevance')), default=None),
]
def _add_common_options(func):
for opt in _common_options:
func = opt(func)
return func
@click.group() @click.group()
def cli(): def cli():
@@ -18,28 +43,13 @@ def cli():
@cli.command('download') @cli.command('download')
@click.argument('directory', type=str) @click.option('--no-dupes', is_flag=True, default=None)
@click.option('-v', '--verbose', default=None, count=True) @click.option('--search-existing', is_flag=True, default=None)
@click.option('-l', '--link', multiple=True, default=None, type=str)
@click.option('-s', '--subreddit', multiple=True, default=None, type=str)
@click.option('-m', '--multireddit', multiple=True, default=None, type=str)
@click.option('-L', '--limit', default=None, type=int)
@click.option('--authenticate', is_flag=True, default=None)
@click.option('--submitted', is_flag=True, default=None)
@click.option('--upvoted', is_flag=True, default=None)
@click.option('--saved', is_flag=True, default=None)
@click.option('--search', default=None, type=str)
@click.option('-u', '--user', type=str, default=None)
@click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None)
@click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
'controversial', 'rising', 'relevance')), default=None)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@click.option('--set-file-scheme', default=None, type=str) @click.option('--set-file-scheme', default=None, type=str)
@click.option('--set-folder-scheme', default=None, type=str) @click.option('--set-folder-scheme', default=None, type=str)
@click.option('--no-dupes', is_flag=True, default=None) @click.option('--skip', default=None, multiple=True)
@click.option('--config', type=str, default=None) @click.option('--skip-domain', default=None, multiple=True)
@click.option('--search-existing', is_flag=True, default=None) @_add_common_options
@click.pass_context @click.pass_context
def cli_download(context: click.Context, **_): def cli_download(context: click.Context, **_):
config = Configuration() config = Configuration()
@@ -50,6 +60,19 @@ def cli_download(context: click.Context, **_):
logger.info('Program complete') logger.info('Program complete')
@cli.command('archive')
@_add_common_options
@click.option('-f,', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None)
@click.pass_context
def cli_archive(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
_setup_logging(config.verbose)
reddit_archiver = Archiver(config)
reddit_archiver.download()
logger.info('Program complete')
def _setup_logging(verbosity: int): def _setup_logging(verbosity: int):
logger.setLevel(1) logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout) stream = logging.StreamHandler(sys.stdout)

View File

@@ -42,6 +42,7 @@ class Archiver(RedditDownloader):
def _write_submission_json(self, entry: ArchiveEntry): def _write_submission_json(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.json') resource = Resource(entry.submission, '', '.json')
file_path = self.file_name_formatter.format_path(resource, self.download_directory) file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file: with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in JSON format at {file_path}') logger.debug(f'Writing submission {entry.submission.id} to file in JSON format at {file_path}')
json.dump(entry.compile(), file) json.dump(entry.compile(), file)
@@ -49,6 +50,7 @@ class Archiver(RedditDownloader):
def _write_submission_xml(self, entry: ArchiveEntry): def _write_submission_xml(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.xml') resource = Resource(entry.submission, '', '.xml')
file_path = self.file_name_formatter.format_path(resource, self.download_directory) file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file: with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in XML format at {file_path}') logger.debug(f'Writing submission {entry.submission.id} to file in XML format at {file_path}')
xml_entry = dict2xml.dict2xml(entry.compile(), wrap='root') xml_entry = dict2xml.dict2xml(entry.compile(), wrap='root')
@@ -57,6 +59,7 @@ class Archiver(RedditDownloader):
def _write_submission_yaml(self, entry: ArchiveEntry): def _write_submission_yaml(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.yaml') resource = Resource(entry.submission, '', '.yaml')
file_path = self.file_name_formatter.format_path(resource, self.download_directory) file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file: with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in YAML format at {file_path}') logger.debug(f'Writing submission {entry.submission.id} to file in YAML format at {file_path}')
yaml.dump(entry.compile(), file) yaml.dump(entry.compile(), file)

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# coding=utf-8 # coding=utf-8
import re
from pathlib import Path from pathlib import Path
import pytest import pytest
@@ -163,3 +164,38 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args) result = runner.invoke(cli, test_args)
assert result.exit_code == 0 assert result.exit_code == 0
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['--subreddit', 'Mindustry', '-L', 25],
['--subreddit', 'Mindustry', '-L', 25, '--format', 'xml'],
['--subreddit', 'Mindustry', '-L', 25, '--format', 'yaml'],
['--subreddit', 'Mindustry', '-L', 25, '--sort', 'new'],
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day'],
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day', '--sort', 'new'],
))
def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.slow
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['--subreddit', 'all', '-L', 100],
['--subreddit', 'all', '-L', 100, '--sort', 'new'],
))
def test_cli_archive_long(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)