Add option to skip specified subreddits (#268)

* Rename variables * Add option to skip specific subreddits * Update README
2021-04-17 19:56:43 +10:00
parent c85ae3fc69
commit d8752b15fa
5 changed files with 33 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -139,6 +139,10 @@ The following options apply only to the `download` command. This command downloa
 - `--skip`
  - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
  - Can be specified multiple times
 - `--skip-subreddit`
  - This skips all submissions from the specified subreddit
  - Can be specified multiple times
  - Also accepts CSV subreddit names
 #### Archiver Options
--- a/bdfr/main.py
+++ b/bdfr/main.py
@@ -53,6 +53,7 @@ def cli():
@click.option('--search-existing', is_flag=True, default=None)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@click.option('--skip-subreddit', default=None, multiple=True)
@_add_common_options
@click.pass_context
 def cli_download(context: click.Context, **_):
--- a/bdfr/configuration.py
+++ b/bdfr/configuration.py
@@ -27,6 +27,7 @@ class Configuration(Namespace):
        self.folder_scheme: str = '{SUBREDDIT}'
        self.skip: list[str] = []
        self.skip_domain: list[str] = []
        self.skip_subreddit: list[str] = []
        self.sort: str = 'hot'
        self.submitted: bool = False
        self.subreddit: list[str] = []
--- a/bdfr/downloader.py
+++ b/bdfr/downloader.py
@@ -93,6 +93,9 @@ class RedditDownloader:
        self.authenticator = self._create_authenticator()
        logger.log(9, 'Created site authenticator')
        self.args.skip_subreddit = self._split_args_input(self.args.skip_subreddit)
        self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit])
    def _read_config(self):
        """Read any cfg values that need to be processed"""
        if self.args.max_wait_time is None:
@@ -210,13 +213,13 @@ class RedditDownloader:
        return match.group(1)
    @staticmethod
-    def _split_args_input(subreddit_entries: list[str]) -> set[str]:
+    def _split_args_input(entries: list[str]) -> set[str]:
-        all_subreddits = []
+        all_entries = []
        split_pattern = re.compile(r'[,;]\s?')
-        for entry in subreddit_entries:
+        for entry in entries:
            results = re.split(split_pattern, entry)
-            all_subreddits.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
+            all_entries.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
-        return set(all_subreddits)
+        return set(all_entries)
    def _get_subreddits(self) -> list[praw.models.ListingGenerator]:
        if self.args.subreddit:
@@ -354,8 +357,10 @@ class RedditDownloader:
        for generator in self.reddit_lists:
            for submission in generator:
                if submission.id in self.excluded_submission_ids:
-                    logger.debug(f'Submission {submission.id} in exclusion list, skipping')
+                    logger.debug(f'Object {submission.id} in exclusion list, skipping')
                    continue
                elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
                    logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
                else:
                    logger.debug(f'Attempting to download submission {submission.id}')
                    self._download_submission(submission)
--- a/bdfr/tests/test_integration.py
+++ b/bdfr/tests/test_integration.py
@@ -284,6 +284,22 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path):
    assert 'Downloaded submission ' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
    ['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'],
    ['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'],
 ))
 def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
    runner = CliRunner()
    test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
    result = runner.invoke(cli, test_args)
    assert result.exit_code == 0
    assert 'in skip list' in result.output
    assert 'Downloaded submission ' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')