Merge pull request #701 from OMEGARAZER/development
This commit is contained in:
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -12,18 +12,22 @@ assignees: ''
|
||||
- [ ] I have read the [Opening an issue](https://github.com/aliparlakci/bulk-downloader-for-reddit/blob/master/docs/CONTRIBUTING.md#opening-an-issue)
|
||||
|
||||
## Description
|
||||
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
## Command
|
||||
```
|
||||
|
||||
```text
|
||||
Paste here the command(s) that causes the bug
|
||||
```
|
||||
|
||||
## Environment (please complete the following information):
|
||||
- OS: [e.g. Windows 10]
|
||||
- Python version: [e.g. 3.9.4]
|
||||
## Environment (please complete the following information)
|
||||
|
||||
- OS: [e.g. Windows 10]
|
||||
- Python version: [e.g. 3.9.4]
|
||||
|
||||
## Logs
|
||||
```
|
||||
|
||||
```text
|
||||
Paste the log output here.
|
||||
```
|
||||
|
||||
1
.github/ISSUE_TEMPLATE/feature_request.md
vendored
1
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -12,4 +12,5 @@ assignees: ''
|
||||
- [ ] I have read the [Opening an issue](../../README.md#configuration)
|
||||
|
||||
## Description
|
||||
|
||||
Clearly state the current situation and issues you experience. Then, explain how this feature would solve these issues and make life easier. Also, explain the feature with as many detail as possible.
|
||||
|
||||
@@ -12,7 +12,9 @@ assignees: ''
|
||||
- [ ] I have read the [Opening an issue](../../README.md#configuration)
|
||||
|
||||
## Site
|
||||
|
||||
Provide a URL to domain of the site.
|
||||
|
||||
## Example posts
|
||||
|
||||
Provide example reddit posts with the domain.
|
||||
|
||||
@@ -75,7 +75,7 @@ class Configuration(Namespace):
|
||||
if not yaml_file_loc.exists():
|
||||
logger.error(f'No YAML file found at {yaml_file_loc}')
|
||||
return
|
||||
with open(yaml_file_loc) as file:
|
||||
with yaml_file_loc.open() as file:
|
||||
try:
|
||||
opts = yaml.load(file, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as e:
|
||||
|
||||
@@ -91,7 +91,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
logger.log(9, 'Created site authenticator')
|
||||
|
||||
self.args.skip_subreddit = self.split_args_input(self.args.skip_subreddit)
|
||||
self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit])
|
||||
self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit}
|
||||
|
||||
def read_config(self):
|
||||
"""Read any cfg values that need to be processed"""
|
||||
@@ -113,7 +113,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
def parse_disabled_modules(self):
|
||||
disabled_modules = self.args.disable_module
|
||||
disabled_modules = self.split_args_input(disabled_modules)
|
||||
disabled_modules = set([name.strip().lower() for name in disabled_modules])
|
||||
disabled_modules = {name.strip().lower() for name in disabled_modules}
|
||||
self.args.disable_module = disabled_modules
|
||||
logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}')
|
||||
|
||||
@@ -249,7 +249,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
if self.args.authenticate:
|
||||
try:
|
||||
subscribed_subreddits = list(self.reddit_instance.user.subreddits(limit=None))
|
||||
subscribed_subreddits = set([s.display_name for s in subscribed_subreddits])
|
||||
subscribed_subreddits = {s.display_name for s in subscribed_subreddits}
|
||||
except prawcore.InsufficientScope:
|
||||
logger.error('BDFR has insufficient scope to access subreddit lists')
|
||||
else:
|
||||
@@ -428,7 +428,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
if not id_file.exists():
|
||||
logger.warning(f'ID file at {id_file} does not exist')
|
||||
continue
|
||||
with open(id_file, 'r') as file:
|
||||
with id_file.open('r') as file:
|
||||
for line in file:
|
||||
out.append(line.strip())
|
||||
return set(out)
|
||||
|
||||
@@ -36,7 +36,7 @@ class DownloadFilter:
|
||||
combined_extensions = '|'.join(self.excluded_extensions)
|
||||
pattern = re.compile(r'.*({})$'.format(combined_extensions))
|
||||
if re.match(pattern, resource_extension):
|
||||
logger.log(9, f'Url "{resource_extension}" matched with "{str(pattern)}"')
|
||||
logger.log(9, f'Url "{resource_extension}" matched with "{pattern}"')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
@@ -47,7 +47,7 @@ class DownloadFilter:
|
||||
combined_domains = '|'.join(self.excluded_domains)
|
||||
pattern = re.compile(r'https?://.*({}).*'.format(combined_domains))
|
||||
if re.match(pattern, url):
|
||||
logger.log(9, f'Url "{url}" matched with "{str(pattern)}"')
|
||||
logger.log(9, f'Url "{url}" matched with "{pattern}"')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
||||
def _calc_hash(existing_file: Path):
|
||||
chunk_size = 1024 * 1024
|
||||
md5_hash = hashlib.md5()
|
||||
with open(existing_file, 'rb') as file:
|
||||
with existing_file.open('rb') as file:
|
||||
chunk = file.read(chunk_size)
|
||||
while chunk:
|
||||
md5_hash.update(chunk)
|
||||
@@ -127,7 +127,7 @@ class RedditDownloader(RedditConnector):
|
||||
f' in submission {submission.id}')
|
||||
return
|
||||
try:
|
||||
with open(destination, 'wb') as file:
|
||||
with destination.open('wb') as file:
|
||||
file.write(res.content)
|
||||
logger.debug(f'Written file to {destination}')
|
||||
except OSError as e:
|
||||
|
||||
@@ -107,7 +107,7 @@ class FileNameFormatter:
|
||||
destination_directory,
|
||||
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string],
|
||||
)
|
||||
index = f'_{str(index)}' if index else ''
|
||||
index = f'_{index}' if index else ''
|
||||
if not resource.extension:
|
||||
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
|
||||
file_name = str(self._format_name(resource.source_submission, self.file_format_string))
|
||||
|
||||
@@ -48,11 +48,11 @@ class Youtube(BaseDownloader):
|
||||
raise SiteDownloaderError(f'Youtube download failed: {e}')
|
||||
|
||||
downloaded_files = list(download_path.iterdir())
|
||||
if len(downloaded_files) > 0:
|
||||
if downloaded_files:
|
||||
downloaded_file = downloaded_files[0]
|
||||
else:
|
||||
raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}")
|
||||
with open(downloaded_file, 'rb') as file:
|
||||
with downloaded_file.open('rb') as file:
|
||||
content = file.read()
|
||||
return content
|
||||
return download
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
if (-not ([string]::IsNullOrEmpty($env:REDDIT_TOKEN)))
|
||||
{
|
||||
copy .\\bdfr\\default_config.cfg .\\test_config.cfg
|
||||
echo "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg
|
||||
Copy-Item .\\bdfr\\default_config.cfg .\\test_config.cfg
|
||||
Write-Output "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
if [ ! -z "$REDDIT_TOKEN" ]
|
||||
#!/bin/bash
|
||||
|
||||
if [ -n "$REDDIT_TOKEN" ]
|
||||
then
|
||||
cp ./bdfr/default_config.cfg ./test_config.cfg
|
||||
echo -e "\nuser_token = $REDDIT_TOKEN" >> ./test_config.cfg
|
||||
|
||||
@@ -69,8 +69,6 @@ members of the project's leadership.
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
||||
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
|
||||
available at <https://www.contributor-covenant.org/version/1/4/code-of-conduct.html>
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
||||
|
||||
|
||||
@@ -11,11 +11,13 @@ All communication on GitHub, Discord, email, or any other medium must conform to
|
||||
**Before opening a new issue**, be sure that no issues regarding your problem already exist. If a similar issue exists, try to contribute to the issue.
|
||||
|
||||
### Bugs
|
||||
|
||||
When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug.
|
||||
|
||||
If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug.
|
||||
|
||||
### Feature requests
|
||||
|
||||
In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers.
|
||||
|
||||
## Pull Requests
|
||||
@@ -41,15 +43,17 @@ Bulk Downloader for Reddit requires Python 3.9 at minimum. First, ensure that yo
|
||||
BDfR is built in a way that it can be packaged and installed via `pip`. This places BDfR next to other Python packages and enables you to run the program from any directory. Since it is managed by pip, you can also uninstall it.
|
||||
|
||||
To install the program, clone the repository and run pip inside the project's root directory:
|
||||
|
||||
```bash
|
||||
$ git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git
|
||||
$ cd ./bulk-downloader-for-reddit
|
||||
$ python3 -m pip install -e .
|
||||
git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git
|
||||
cd ./bulk-downloader-for-reddit
|
||||
python3 -m pip install -e .
|
||||
```
|
||||
|
||||
**`-e`** parameter creates a link to that folder. That is, any change inside the folder affects the package immidiately. So, when developing, you can be sure that the package is not stale and Python is always running your latest changes. (Due to this linking, moving/removing/renaming the folder might break it)
|
||||
|
||||
Then, you can run the program from anywhere in your disk as such:
|
||||
|
||||
```bash
|
||||
bdfr
|
||||
```
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[pytest]
|
||||
addopts = --strict-markers
|
||||
markers =
|
||||
online: tests require a connection to the internet
|
||||
reddit: tests require a connection to Reddit
|
||||
slow: test is slow to run
|
||||
authenticated: test requires an authenticated Reddit instance
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems.
|
||||
|
||||
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
|
||||
- [Script to extract all failed download IDs](#extract-all-failed-ids)
|
||||
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
|
||||
- [Printing summary statistics for a run](#printing-summary-statistics)
|
||||
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
|
||||
- [Script to extract all failed download IDs](#extract-all-failed-ids)
|
||||
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
|
||||
- [Printing summary statistics for a run](#printing-summary-statistics)
|
||||
|
||||
## Extract all Successfully Downloaded IDs
|
||||
|
||||
@@ -58,7 +58,7 @@ A simple script has been included to print sumamry statistics for a run of the B
|
||||
|
||||
This will create an output like the following:
|
||||
|
||||
```
|
||||
```text
|
||||
Downloaded submissions: 250
|
||||
Failed downloads: 103
|
||||
Files already downloaded: 20073
|
||||
|
||||
@@ -6,7 +6,7 @@ else {
|
||||
Exit 1
|
||||
}
|
||||
|
||||
if ($args[1] -ne $null) {
|
||||
if ($null -ne $args[1]) {
|
||||
$output=$args[1]
|
||||
Write-Host "Outputting IDs to $output"
|
||||
}
|
||||
@@ -14,8 +14,8 @@ else {
|
||||
$output="./failed.txt"
|
||||
}
|
||||
|
||||
Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output
|
||||
Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output
|
||||
Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output
|
||||
Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output
|
||||
Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output
|
||||
Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output
|
||||
Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output
|
||||
|
||||
@@ -6,7 +6,7 @@ else {
|
||||
Exit 1
|
||||
}
|
||||
|
||||
if ($args[1] -ne $null) {
|
||||
if ($null -ne $args[1]) {
|
||||
$output=$args[1]
|
||||
Write-Host "Outputting IDs to $output"
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ else {
|
||||
Exit 1
|
||||
}
|
||||
|
||||
if ($args[1] -ne $null) {
|
||||
if ($null -ne $args[1]) {
|
||||
$output=$args[1]
|
||||
Write-Host "Outputting IDs to $output"
|
||||
}
|
||||
|
||||
Submodule scripts/tests/bats updated: ce5ca2802f...e8c840b58f
@@ -1,2 +1 @@
|
||||
[2021-06-12 11:18:25,794 - bdfr.downloader - ERROR] - Failed to download resource https://i.redd.it/61fniokpjq471.jpg in submission nxv3dt with downloader Direct: Unrecoverable error requesting resource: HTTP Code 404
|
||||
|
||||
|
||||
Submodule scripts/tests/test_helper/bats-assert updated: e0de84e9c0...78fa631d13
@@ -47,7 +47,7 @@ def assert_all_results_are_submissions(result_limit: int, results: list[Iterator
|
||||
|
||||
def assert_all_results_are_submissions_or_comments(result_limit: int, results: list[Iterator]) -> list:
|
||||
results = [sub for res in results for sub in res]
|
||||
assert all([isinstance(res, praw.models.Submission) or isinstance(res, praw.models.Comment) for res in results])
|
||||
assert all([isinstance(res, (praw.models.Submission, praw.models.Comment)) for res in results])
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
if result_limit is not None:
|
||||
assert len(results) == result_limit
|
||||
@@ -259,7 +259,7 @@ def test_get_subreddit_search(
|
||||
assert all([res.subreddit.display_name in test_subreddits for res in results])
|
||||
assert len(results) <= max_expected_len
|
||||
if max_expected_len != 0:
|
||||
assert len(results) > 0
|
||||
assert results
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
|
||||
|
||||
@@ -356,7 +356,7 @@ def test_get_subscribed_subreddits(downloader_mock: MagicMock, authenticated_red
|
||||
downloader_mock.sort_filter = RedditTypes.SortType.HOT
|
||||
results = RedditConnector.get_subreddits(downloader_mock)
|
||||
assert all([isinstance(s, praw.models.ListingGenerator) for s in results])
|
||||
assert len(results) > 0
|
||||
assert results
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('test_name', 'expected'), (
|
||||
|
||||
@@ -152,7 +152,7 @@ def test_download_submission_hash_exists(
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
folder_contents = list(tmp_path.iterdir())
|
||||
output = capsys.readouterr()
|
||||
assert len(folder_contents) == 0
|
||||
assert not folder_contents
|
||||
assert re.search(r'Resource hash .*? downloaded elsewhere', output.out)
|
||||
|
||||
|
||||
|
||||
@@ -66,6 +66,6 @@ def test_token_manager_write(example_config: configparser.ConfigParser, tmp_path
|
||||
test_manager = OAuth2TokenManager(example_config, test_path)
|
||||
test_manager.post_refresh_callback(mock_authoriser)
|
||||
assert example_config.get('DEFAULT', 'user_token') == 'changed_token'
|
||||
with open(test_path, 'r') as file:
|
||||
with test_path.open('r') as file:
|
||||
file_contents = file.read()
|
||||
assert 'user_token = changed_token' in file_contents
|
||||
|
||||
Reference in New Issue
Block a user