Add a combined command for the archiver and downloader: clone (#433)

* Simplify downloader function

* Add basic scraper class

* Add "scrape" command

* Rename "scrape" command to "clone"

* Add integration tests for clone command

* Update README

* Fix failing test
This commit is contained in:
Serene
2021-06-06 20:29:09 +10:00
committed by GitHub
parent a2f010c40d
commit 434aeb8feb
6 changed files with 139 additions and 34 deletions

View File

@@ -8,6 +8,7 @@ import click
from bdfr.archiver import Archiver
from bdfr.configuration import Configuration
from bdfr.downloader import RedditDownloader
from bdfr.cloner import RedditCloner
logger = logging.getLogger()
@@ -32,11 +33,32 @@ _common_options = [
'controversial', 'rising', 'relevance')), default=None),
]
_downloader_options = [
click.option('--file-scheme', default=None, type=str),
click.option('--folder-scheme', default=None, type=str),
click.option('--make-hard-links', is_flag=True, default=None),
click.option('--max-wait-time', type=int, default=None),
click.option('--no-dupes', is_flag=True, default=None),
click.option('--search-existing', is_flag=True, default=None),
click.option('--exclude-id', default=None, multiple=True),
click.option('--exclude-id-file', default=None, multiple=True),
click.option('--skip', default=None, multiple=True),
click.option('--skip-domain', default=None, multiple=True),
click.option('--skip-subreddit', default=None, multiple=True),
]
def _add_common_options(func):
for opt in _common_options:
func = opt(func)
return func
_archiver_options = [
click.option('--all-comments', is_flag=True, default=None),
click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
]
def _add_options(opts: list):
def wrap(func):
for opt in opts:
func = opt(func)
return func
return wrap
@click.group()
@@ -45,18 +67,8 @@ def cli():
@cli.command('download')
@click.option('--file-scheme', default=None, type=str)
@click.option('--folder-scheme', default=None, type=str)
@click.option('--make-hard-links', is_flag=True, default=None)
@click.option('--max-wait-time', type=int, default=None)
@click.option('--no-dupes', is_flag=True, default=None)
@click.option('--search-existing', is_flag=True, default=None)
@click.option('--exclude-id', default=None, multiple=True)
@click.option('--exclude-id-file', default=None, multiple=True)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@click.option('--skip-subreddit', default=None, multiple=True)
@_add_common_options
@_add_options(_common_options)
@_add_options(_downloader_options)
@click.pass_context
def cli_download(context: click.Context, **_):
config = Configuration()
@@ -73,9 +85,8 @@ def cli_download(context: click.Context, **_):
@cli.command('archive')
@_add_common_options
@click.option('--all-comments', is_flag=True, default=None)
@click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None)
@_add_options(_common_options)
@_add_options(_archiver_options)
@click.pass_context
def cli_archive(context: click.Context, **_):
config = Configuration()
@@ -85,7 +96,26 @@ def cli_archive(context: click.Context, **_):
reddit_archiver = Archiver(config)
reddit_archiver.download()
except Exception:
logger.exception('Downloader exited unexpectedly')
logger.exception('Archiver exited unexpectedly')
raise
else:
logger.info('Program complete')
@cli.command('clone')
@_add_options(_common_options)
@_add_options(_archiver_options)
@_add_options(_downloader_options)
@click.pass_context
def cli_clone(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
setup_logging(config.verbose)
try:
reddit_scraper = RedditCloner(config)
reddit_scraper.download()
except Exception:
logger.exception('Scraper exited unexpectedly')
raise
else:
logger.info('Program complete')