Add a combined command for the archiver and downloader: clone (#433)
* Simplify downloader function * Add basic scraper class * Add "scrape" command * Rename "scrape" command to "clone" * Add integration tests for clone command * Update README * Fix failing test
This commit is contained in:
@@ -8,6 +8,7 @@ import click
|
||||
from bdfr.archiver import Archiver
|
||||
from bdfr.configuration import Configuration
|
||||
from bdfr.downloader import RedditDownloader
|
||||
from bdfr.cloner import RedditCloner
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
@@ -32,11 +33,32 @@ _common_options = [
|
||||
'controversial', 'rising', 'relevance')), default=None),
|
||||
]
|
||||
|
||||
_downloader_options = [
|
||||
click.option('--file-scheme', default=None, type=str),
|
||||
click.option('--folder-scheme', default=None, type=str),
|
||||
click.option('--make-hard-links', is_flag=True, default=None),
|
||||
click.option('--max-wait-time', type=int, default=None),
|
||||
click.option('--no-dupes', is_flag=True, default=None),
|
||||
click.option('--search-existing', is_flag=True, default=None),
|
||||
click.option('--exclude-id', default=None, multiple=True),
|
||||
click.option('--exclude-id-file', default=None, multiple=True),
|
||||
click.option('--skip', default=None, multiple=True),
|
||||
click.option('--skip-domain', default=None, multiple=True),
|
||||
click.option('--skip-subreddit', default=None, multiple=True),
|
||||
]
|
||||
|
||||
def _add_common_options(func):
|
||||
for opt in _common_options:
|
||||
func = opt(func)
|
||||
return func
|
||||
_archiver_options = [
|
||||
click.option('--all-comments', is_flag=True, default=None),
|
||||
click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
|
||||
]
|
||||
|
||||
|
||||
def _add_options(opts: list):
|
||||
def wrap(func):
|
||||
for opt in opts:
|
||||
func = opt(func)
|
||||
return func
|
||||
return wrap
|
||||
|
||||
|
||||
@click.group()
|
||||
@@ -45,18 +67,8 @@ def cli():
|
||||
|
||||
|
||||
@cli.command('download')
|
||||
@click.option('--file-scheme', default=None, type=str)
|
||||
@click.option('--folder-scheme', default=None, type=str)
|
||||
@click.option('--make-hard-links', is_flag=True, default=None)
|
||||
@click.option('--max-wait-time', type=int, default=None)
|
||||
@click.option('--no-dupes', is_flag=True, default=None)
|
||||
@click.option('--search-existing', is_flag=True, default=None)
|
||||
@click.option('--exclude-id', default=None, multiple=True)
|
||||
@click.option('--exclude-id-file', default=None, multiple=True)
|
||||
@click.option('--skip', default=None, multiple=True)
|
||||
@click.option('--skip-domain', default=None, multiple=True)
|
||||
@click.option('--skip-subreddit', default=None, multiple=True)
|
||||
@_add_common_options
|
||||
@_add_options(_common_options)
|
||||
@_add_options(_downloader_options)
|
||||
@click.pass_context
|
||||
def cli_download(context: click.Context, **_):
|
||||
config = Configuration()
|
||||
@@ -73,9 +85,8 @@ def cli_download(context: click.Context, **_):
|
||||
|
||||
|
||||
@cli.command('archive')
|
||||
@_add_common_options
|
||||
@click.option('--all-comments', is_flag=True, default=None)
|
||||
@click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None)
|
||||
@_add_options(_common_options)
|
||||
@_add_options(_archiver_options)
|
||||
@click.pass_context
|
||||
def cli_archive(context: click.Context, **_):
|
||||
config = Configuration()
|
||||
@@ -85,7 +96,26 @@ def cli_archive(context: click.Context, **_):
|
||||
reddit_archiver = Archiver(config)
|
||||
reddit_archiver.download()
|
||||
except Exception:
|
||||
logger.exception('Downloader exited unexpectedly')
|
||||
logger.exception('Archiver exited unexpectedly')
|
||||
raise
|
||||
else:
|
||||
logger.info('Program complete')
|
||||
|
||||
|
||||
@cli.command('clone')
|
||||
@_add_options(_common_options)
|
||||
@_add_options(_archiver_options)
|
||||
@_add_options(_downloader_options)
|
||||
@click.pass_context
|
||||
def cli_clone(context: click.Context, **_):
|
||||
config = Configuration()
|
||||
config.process_click_arguments(context)
|
||||
setup_logging(config.verbose)
|
||||
try:
|
||||
reddit_scraper = RedditCloner(config)
|
||||
reddit_scraper.download()
|
||||
except Exception:
|
||||
logger.exception('Scraper exited unexpectedly')
|
||||
raise
|
||||
else:
|
||||
logger.info('Program complete')
|
||||
|
||||
Reference in New Issue
Block a user