From d8752b15fab02249229a0135cb0e8cf5475f1d77 Mon Sep 17 00:00:00 2001 From: Serene <33189705+Serene-Arc@users.noreply.github.com> Date: Sat, 17 Apr 2021 19:56:43 +1000 Subject: [PATCH] Add option to skip specified subreddits (#268) * Rename variables * Add option to skip specific subreddits * Update README --- README.md | 4 ++++ bdfr/__main__.py | 1 + bdfr/configuration.py | 1 + bdfr/downloader.py | 17 +++++++++++------ bdfr/tests/test_integration.py | 16 ++++++++++++++++ 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ec6ad46..d83da1a 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,10 @@ The following options apply only to the `download` command. This command downloa - `--skip` - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded - Can be specified multiple times +- `--skip-subreddit` + - This skips all submissions from the specified subreddit + - Can be specified multiple times + - Also accepts CSV subreddit names #### Archiver Options diff --git a/bdfr/__main__.py b/bdfr/__main__.py index c58f7ca..26759a1 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -53,6 +53,7 @@ def cli(): @click.option('--search-existing', is_flag=True, default=None) @click.option('--skip', default=None, multiple=True) @click.option('--skip-domain', default=None, multiple=True) +@click.option('--skip-subreddit', default=None, multiple=True) @_add_common_options @click.pass_context def cli_download(context: click.Context, **_): diff --git a/bdfr/configuration.py b/bdfr/configuration.py index e6d0af7..1d9610c 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -27,6 +27,7 @@ class Configuration(Namespace): self.folder_scheme: str = '{SUBREDDIT}' self.skip: list[str] = [] self.skip_domain: list[str] = [] + self.skip_subreddit: list[str] = [] self.sort: str = 'hot' self.submitted: bool = False self.subreddit: list[str] = [] diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 4197b04..4897831 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -93,6 +93,9 @@ class RedditDownloader: self.authenticator = self._create_authenticator() logger.log(9, 'Created site authenticator') + self.args.skip_subreddit = self._split_args_input(self.args.skip_subreddit) + self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit]) + def _read_config(self): """Read any cfg values that need to be processed""" if self.args.max_wait_time is None: @@ -210,13 +213,13 @@ class RedditDownloader: return match.group(1) @staticmethod - def _split_args_input(subreddit_entries: list[str]) -> set[str]: - all_subreddits = [] + def _split_args_input(entries: list[str]) -> set[str]: + all_entries = [] split_pattern = re.compile(r'[,;]\s?') - for entry in subreddit_entries: + for entry in entries: results = re.split(split_pattern, entry) - all_subreddits.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results]) - return set(all_subreddits) + all_entries.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results]) + return set(all_entries) def _get_subreddits(self) -> list[praw.models.ListingGenerator]: if self.args.subreddit: @@ -354,8 +357,10 @@ class RedditDownloader: for generator in self.reddit_lists: for submission in generator: if submission.id in self.excluded_submission_ids: - logger.debug(f'Submission {submission.id} in exclusion list, skipping') + logger.debug(f'Object {submission.id} in exclusion list, skipping') continue + elif submission.subreddit.display_name.lower() in self.args.skip_subreddit: + logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list') else: logger.debug(f'Attempting to download submission {submission.id}') self._download_submission(submission) diff --git a/bdfr/tests/test_integration.py b/bdfr/tests/test_integration.py index 9623b24..5ab0871 100644 --- a/bdfr/tests/test_integration.py +++ b/bdfr/tests/test_integration.py @@ -284,6 +284,22 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path): assert 'Downloaded submission ' not in result.output +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'], + ['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'], +)) +def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'in skip list' in result.output + assert 'Downloaded submission ' not in result.output + + @pytest.mark.online @pytest.mark.reddit @pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')