Add option to skip specified subreddits (#268)

* Rename variables

* Add option to skip specific subreddits

* Update README
This commit is contained in:
Serene
2021-04-17 19:56:43 +10:00
committed by Ali Parlakci
parent c85ae3fc69
commit d8752b15fa
5 changed files with 33 additions and 6 deletions

View File

@@ -139,6 +139,10 @@ The following options apply only to the `download` command. This command downloa
- `--skip` - `--skip`
- This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
- Can be specified multiple times - Can be specified multiple times
- `--skip-subreddit`
- This skips all submissions from the specified subreddit
- Can be specified multiple times
- Also accepts CSV subreddit names
#### Archiver Options #### Archiver Options

View File

@@ -53,6 +53,7 @@ def cli():
@click.option('--search-existing', is_flag=True, default=None) @click.option('--search-existing', is_flag=True, default=None)
@click.option('--skip', default=None, multiple=True) @click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True) @click.option('--skip-domain', default=None, multiple=True)
@click.option('--skip-subreddit', default=None, multiple=True)
@_add_common_options @_add_common_options
@click.pass_context @click.pass_context
def cli_download(context: click.Context, **_): def cli_download(context: click.Context, **_):

View File

@@ -27,6 +27,7 @@ class Configuration(Namespace):
self.folder_scheme: str = '{SUBREDDIT}' self.folder_scheme: str = '{SUBREDDIT}'
self.skip: list[str] = [] self.skip: list[str] = []
self.skip_domain: list[str] = [] self.skip_domain: list[str] = []
self.skip_subreddit: list[str] = []
self.sort: str = 'hot' self.sort: str = 'hot'
self.submitted: bool = False self.submitted: bool = False
self.subreddit: list[str] = [] self.subreddit: list[str] = []

View File

@@ -93,6 +93,9 @@ class RedditDownloader:
self.authenticator = self._create_authenticator() self.authenticator = self._create_authenticator()
logger.log(9, 'Created site authenticator') logger.log(9, 'Created site authenticator')
self.args.skip_subreddit = self._split_args_input(self.args.skip_subreddit)
self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit])
def _read_config(self): def _read_config(self):
"""Read any cfg values that need to be processed""" """Read any cfg values that need to be processed"""
if self.args.max_wait_time is None: if self.args.max_wait_time is None:
@@ -210,13 +213,13 @@ class RedditDownloader:
return match.group(1) return match.group(1)
@staticmethod @staticmethod
def _split_args_input(subreddit_entries: list[str]) -> set[str]: def _split_args_input(entries: list[str]) -> set[str]:
all_subreddits = [] all_entries = []
split_pattern = re.compile(r'[,;]\s?') split_pattern = re.compile(r'[,;]\s?')
for entry in subreddit_entries: for entry in entries:
results = re.split(split_pattern, entry) results = re.split(split_pattern, entry)
all_subreddits.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results]) all_entries.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
return set(all_subreddits) return set(all_entries)
def _get_subreddits(self) -> list[praw.models.ListingGenerator]: def _get_subreddits(self) -> list[praw.models.ListingGenerator]:
if self.args.subreddit: if self.args.subreddit:
@@ -354,8 +357,10 @@ class RedditDownloader:
for generator in self.reddit_lists: for generator in self.reddit_lists:
for submission in generator: for submission in generator:
if submission.id in self.excluded_submission_ids: if submission.id in self.excluded_submission_ids:
logger.debug(f'Submission {submission.id} in exclusion list, skipping') logger.debug(f'Object {submission.id} in exclusion list, skipping')
continue continue
elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
else: else:
logger.debug(f'Attempting to download submission {submission.id}') logger.debug(f'Attempting to download submission {submission.id}')
self._download_submission(submission) self._download_submission(submission)

View File

@@ -284,6 +284,22 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path):
assert 'Downloaded submission ' not in result.output assert 'Downloaded submission ' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'],
['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'],
))
def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'in skip list' in result.output
assert 'Downloaded submission ' not in result.output
@pytest.mark.online @pytest.mark.online
@pytest.mark.reddit @pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests') @pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')