From 34c8a9a5d07d285036fdc77011348688ce262ec9 Mon Sep 17 00:00:00 2001 From: Serene <33189705+Serene-Arc@users.noreply.github.com> Date: Fri, 9 Apr 2021 23:15:45 +1000 Subject: [PATCH] Add option to download user comments (#258) * Add option to download user comments * Update README --- README.md | 2 ++ bulkredditdownloader/__main__.py | 1 + bulkredditdownloader/archiver.py | 9 +++++++++ bulkredditdownloader/configuration.py | 1 + bulkredditdownloader/tests/test_integration.py | 13 +++++++++++++ 5 files changed, 26 insertions(+) diff --git a/README.md b/README.md index daff766..082da6e 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,8 @@ The following options apply only to the `download` command. This command downloa The following options are for the `archive` command specifically. +- `--all-comments` + - When combined with the `--user` option, this will download all the user's comments - `-f, --format` - This specifies the format of the data file saved to disk - The following formats are available: diff --git a/bulkredditdownloader/__main__.py b/bulkredditdownloader/__main__.py index bc4e4e7..4c1158e 100644 --- a/bulkredditdownloader/__main__.py +++ b/bulkredditdownloader/__main__.py @@ -70,6 +70,7 @@ def cli_download(context: click.Context, **_): @cli.command('archive') @_add_common_options +@click.option('--all-comments', is_flag=True, default=None) @click.option('-f,', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None) @click.pass_context def cli_archive(context: click.Context, **_): diff --git a/bulkredditdownloader/archiver.py b/bulkredditdownloader/archiver.py index db4ee92..b44d4de 100644 --- a/bulkredditdownloader/archiver.py +++ b/bulkredditdownloader/archiver.py @@ -4,6 +4,7 @@ import json import logging import re +from typing import Iterator import dict2xml import praw.models @@ -41,6 +42,14 @@ class Archiver(RedditDownloader): supplied_submissions.append(self.reddit_instance.submission(url=sub_id)) return [supplied_submissions] + def _get_user_data(self) -> list[Iterator]: + results = super(Archiver, self)._get_user_data() + if self.args.user and self.args.all_comments: + sort = self._determine_sort_function() + logger.debug(f'Retrieving comments of user {self.args.user}') + results.append(sort(self.reddit_instance.redditor(self.args.user).comments, limit=self.args.limit)) + return results + @staticmethod def _pull_lever_entry_factory(praw_item: (praw.models.Submission, praw.models.Comment)) -> BaseArchiveEntry: if isinstance(praw_item, praw.models.Submission): diff --git a/bulkredditdownloader/configuration.py b/bulkredditdownloader/configuration.py index 5cb23b3..aa634b6 100644 --- a/bulkredditdownloader/configuration.py +++ b/bulkredditdownloader/configuration.py @@ -37,6 +37,7 @@ class Configuration(Namespace): # Archiver-specific options self.format = 'json' + self.all_comments = False def process_click_arguments(self, context: click.Context): for arg_key in context.params.keys(): diff --git a/bulkredditdownloader/tests/test_integration.py b/bulkredditdownloader/tests/test_integration.py index 2991b68..23cdbd4 100644 --- a/bulkredditdownloader/tests/test_integration.py +++ b/bulkredditdownloader/tests/test_integration.py @@ -202,6 +202,19 @@ def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path): assert re.search(r'Writing entry .*? to file in .*? format', result.output) +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['--user', 'me', '--authenticate', '--all-comments', '-L', '10'], +)) +def test_cli_archive_all_user_comments(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + + @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow