9
.gitmodules
vendored
Normal file
9
.gitmodules
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
[submodule "scripts/tests/bats"]
|
||||
path = scripts/tests/bats
|
||||
url = https://github.com/bats-core/bats-core.git
|
||||
[submodule "scripts/tests/test_helper/bats-assert"]
|
||||
path = scripts/tests/test_helper/bats-assert
|
||||
url = https://github.com/bats-core/bats-assert.git
|
||||
[submodule "scripts/tests/test_helper/bats-support"]
|
||||
path = scripts/tests/test_helper/bats-support
|
||||
url = https://github.com/bats-core/bats-support.git
|
||||
@@ -196,6 +196,9 @@ The following options are for the `archive` command specifically.
|
||||
- `json` (default)
|
||||
- `xml`
|
||||
- `yaml`
|
||||
- `--comment-context`
|
||||
- This option will, instead of downloading an individual comment, download the submission that comment is a part of
|
||||
- May result in a longer run time as it retrieves much more data
|
||||
|
||||
### Cloner Options
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ _downloader_options = [
|
||||
|
||||
_archiver_options = [
|
||||
click.option('--all-comments', is_flag=True, default=None),
|
||||
click.option('--comment-context', is_flag=True, default=None),
|
||||
click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
|
||||
]
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ class BaseArchiveEntry(ABC):
|
||||
'id': in_comment.id,
|
||||
'score': in_comment.score,
|
||||
'subreddit': in_comment.subreddit.display_name,
|
||||
'author_flair': in_comment.author_flair_text,
|
||||
'submission': in_comment.submission.id,
|
||||
'stickied': in_comment.stickied,
|
||||
'body': in_comment.body,
|
||||
|
||||
@@ -61,6 +61,9 @@ class Archiver(RedditConnector):
|
||||
raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}')
|
||||
|
||||
def write_entry(self, praw_item: (praw.models.Submission, praw.models.Comment)):
|
||||
if self.args.comment_context and isinstance(praw_item, praw.models.Comment):
|
||||
logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}')
|
||||
praw_item = praw_item.submission
|
||||
archive_entry = self._pull_lever_entry_factory(praw_item)
|
||||
if self.args.format == 'json':
|
||||
self._write_entry_json(archive_entry)
|
||||
|
||||
@@ -41,8 +41,9 @@ class Configuration(Namespace):
|
||||
self.verbose: int = 0
|
||||
|
||||
# Archiver-specific options
|
||||
self.format = 'json'
|
||||
self.all_comments = False
|
||||
self.format = 'json'
|
||||
self.comment_context: bool = False
|
||||
|
||||
def process_click_arguments(self, context: click.Context):
|
||||
for arg_key in context.params.keys():
|
||||
|
||||
@@ -90,14 +90,11 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
def read_config(self):
|
||||
"""Read any cfg values that need to be processed"""
|
||||
if self.args.max_wait_time is None:
|
||||
if not self.cfg_parser.has_option('DEFAULT', 'max_wait_time'):
|
||||
self.cfg_parser.set('DEFAULT', 'max_wait_time', '120')
|
||||
logger.log(9, 'Wrote default download wait time download to config file')
|
||||
self.args.max_wait_time = self.cfg_parser.getint('DEFAULT', 'max_wait_time')
|
||||
self.args.max_wait_time = self.cfg_parser.getint('DEFAULT', 'max_wait_time', fallback=120)
|
||||
logger.debug(f'Setting maximum download wait time to {self.args.max_wait_time} seconds')
|
||||
if self.args.time_format is None:
|
||||
option = self.cfg_parser.get('DEFAULT', 'time_format', fallback='ISO')
|
||||
if re.match(r'^[ \'\"]*$', option):
|
||||
if re.match(r'^[\s\'\"]*$', option):
|
||||
option = 'ISO'
|
||||
logger.debug(f'Setting datetime format string to {option}')
|
||||
self.args.time_format = option
|
||||
@@ -119,7 +116,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
logger.debug('Using authenticated Reddit instance')
|
||||
if not self.cfg_parser.has_option('DEFAULT', 'user_token'):
|
||||
logger.log(9, 'Commencing OAuth2 authentication')
|
||||
scopes = self.cfg_parser.get('DEFAULT', 'scopes')
|
||||
scopes = self.cfg_parser.get('DEFAULT', 'scopes', fallback='identity, history, read, save')
|
||||
scopes = OAuth2Authenticator.split_scopes(scopes)
|
||||
oauth2_authenticator = OAuth2Authenticator(
|
||||
scopes,
|
||||
@@ -210,7 +207,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
if log_path.exists():
|
||||
try:
|
||||
file_handler.doRollover()
|
||||
except PermissionError as e:
|
||||
except PermissionError:
|
||||
logger.critical(
|
||||
'Cannot rollover logfile, make sure this is the only '
|
||||
'BDFR process or specify alternate logfile location')
|
||||
@@ -242,6 +239,9 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
if self.args.subreddit:
|
||||
out = []
|
||||
for reddit in self.split_args_input(self.args.subreddit):
|
||||
if reddit == 'friends' and self.authenticated is False:
|
||||
logger.error('Cannot read friends subreddit without an authenticated instance')
|
||||
continue
|
||||
try:
|
||||
reddit = self.reddit_instance.subreddit(reddit)
|
||||
try:
|
||||
@@ -394,7 +394,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||
|
||||
@staticmethod
|
||||
def check_subreddit_status(subreddit: praw.models.Subreddit):
|
||||
if subreddit.display_name == 'all':
|
||||
if subreddit.display_name in ('all', 'friends'):
|
||||
return
|
||||
try:
|
||||
assert subreddit.id
|
||||
|
||||
@@ -54,6 +54,9 @@ class RedditDownloader(RedditConnector):
|
||||
elif not isinstance(submission, praw.models.Submission):
|
||||
logger.warning(f'{submission.id} is not a submission')
|
||||
return
|
||||
elif not self.download_filter.check_url(submission.url):
|
||||
logger.debug(f'Submission {submission.id} filtered due to URL {submission.url}')
|
||||
return
|
||||
|
||||
logger.debug(f'Attempting to download submission {submission.id}')
|
||||
try:
|
||||
@@ -76,7 +79,7 @@ class RedditDownloader(RedditConnector):
|
||||
logger.debug(f'File {destination} from submission {submission.id} already exists, continuing')
|
||||
continue
|
||||
elif not self.download_filter.check_resource(res):
|
||||
logger.debug(f'Download filter removed {submission.id} with URL {submission.url}')
|
||||
logger.debug(f'Download filter removed {submission.id} file with URL {submission.url}')
|
||||
continue
|
||||
try:
|
||||
res.download(self.args.max_wait_time)
|
||||
@@ -103,7 +106,8 @@ class RedditDownloader(RedditConnector):
|
||||
logger.debug(f'Written file to {destination}')
|
||||
except OSError as e:
|
||||
logger.exception(e)
|
||||
logger.error(f'Failed to write file to {destination} in submission {submission.id}: {e}')
|
||||
logger.error(f'Failed to write file in submission {submission.id} to {destination}: {e}')
|
||||
return
|
||||
creation_time = time.mktime(datetime.fromtimestamp(submission.created_utc).timetuple())
|
||||
os.utime(destination, (creation_time, creation_time))
|
||||
self.master_hash_list[resource_hash] = destination
|
||||
|
||||
@@ -13,6 +13,7 @@ from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import Youtub
|
||||
from bdfr.site_downloaders.gallery import Gallery
|
||||
from bdfr.site_downloaders.gfycat import Gfycat
|
||||
from bdfr.site_downloaders.imgur import Imgur
|
||||
from bdfr.site_downloaders.pornhub import PornHub
|
||||
from bdfr.site_downloaders.redgifs import Redgifs
|
||||
from bdfr.site_downloaders.self_post import SelfPost
|
||||
from bdfr.site_downloaders.youtube import Youtube
|
||||
@@ -43,6 +44,8 @@ class DownloadFactory:
|
||||
return Youtube
|
||||
elif re.match(r'i\.redd\.it.*', sanitised_url):
|
||||
return Direct
|
||||
elif re.match(r'pornhub\.com.*', sanitised_url):
|
||||
return PornHub
|
||||
elif YoutubeDlFallback.can_handle_link(sanitised_url):
|
||||
return YoutubeDlFallback
|
||||
else:
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bdfr.exceptions import SiteDownloaderError
|
||||
@@ -20,21 +19,30 @@ class Gallery(BaseDownloader):
|
||||
super().__init__(post)
|
||||
|
||||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||
image_urls = self._get_links(self.post.url)
|
||||
try:
|
||||
image_urls = self._get_links(self.post.gallery_data['items'])
|
||||
except AttributeError:
|
||||
try:
|
||||
image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items'])
|
||||
except (AttributeError, IndexError, TypeError):
|
||||
logger.error(f'Could not find gallery data in submission {self.post.id}')
|
||||
logger.exception('Gallery image find failure')
|
||||
raise SiteDownloaderError('No images found in Reddit gallery')
|
||||
|
||||
if not image_urls:
|
||||
raise SiteDownloaderError('No images found in Reddit gallery')
|
||||
return [Resource(self.post, url) for url in image_urls]
|
||||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> list[str]:
|
||||
resource_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
}
|
||||
page = Gallery.retrieve_url(url, headers=resource_headers)
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
|
||||
links = [link.get('href') for link in links]
|
||||
return links
|
||||
@ staticmethod
|
||||
def _get_links(id_dict: list[dict]) -> list[str]:
|
||||
out = []
|
||||
for item in id_dict:
|
||||
image_id = item['media_id']
|
||||
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
|
||||
for extension in possible_extensions:
|
||||
test_url = f'https://i.redd.it/{image_id}{extension}'
|
||||
response = requests.head(test_url)
|
||||
if response.status_code == 200:
|
||||
out.append(test_url)
|
||||
break
|
||||
return out
|
||||
|
||||
@@ -37,9 +37,10 @@ class Imgur(BaseDownloader):
|
||||
|
||||
@staticmethod
|
||||
def _get_data(link: str) -> dict:
|
||||
if re.match(r'.*\.gifv$', link):
|
||||
link = link.rstrip('?')
|
||||
if re.match(r'(?i).*\.gifv$', link):
|
||||
link = link.replace('i.imgur', 'imgur')
|
||||
link = link.rstrip('.gifv')
|
||||
link = re.sub('(?i)\\.gifv$', '', link)
|
||||
|
||||
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
||||
|
||||
|
||||
26
bdfr/site_downloaders/pornhub.py
Normal file
26
bdfr/site_downloaders/pornhub.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from praw.models import Submission
|
||||
|
||||
from bdfr.resource import Resource
|
||||
from bdfr.site_authenticator import SiteAuthenticator
|
||||
from bdfr.site_downloaders.youtube import Youtube
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PornHub(Youtube):
|
||||
def __init__(self, post: Submission):
|
||||
super().__init__(post)
|
||||
|
||||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||
ytdl_options = {
|
||||
'format': 'best',
|
||||
'nooverwrites': True,
|
||||
}
|
||||
out = self._download_video(ytdl_options)
|
||||
return [out]
|
||||
@@ -4,7 +4,6 @@ import json
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
from bdfr.exceptions import SiteDownloaderError
|
||||
|
||||
@@ -43,7 +43,6 @@ class Youtube(BaseDownloader):
|
||||
except youtube_dl.DownloadError as e:
|
||||
raise SiteDownloaderError(f'Youtube download failed: {e}')
|
||||
|
||||
downloaded_file = None
|
||||
downloaded_files = list(download_path.iterdir())
|
||||
if len(downloaded_files) > 0:
|
||||
downloaded_file = downloaded_files[0]
|
||||
|
||||
@@ -11,12 +11,13 @@ if [ -n "$2" ]; then
|
||||
output="$2"
|
||||
echo "Outputting IDs to $output"
|
||||
else
|
||||
output="failed.txt"
|
||||
output="./failed.txt"
|
||||
fi
|
||||
|
||||
{
|
||||
grep 'Could not download submission' "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ;
|
||||
grep 'Failed to download resource' "$file" | awk '{ print $15 }' ;
|
||||
grep 'failed to download submission' "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ;
|
||||
grep 'Failed to write file' "$file" | awk '{ print $16 }' | rev | cut -c 2- | rev ;
|
||||
grep 'Failed to write file' "$file" | awk '{ print $13 }' | rev | cut -c 2- | rev ;
|
||||
grep 'skipped due to disabled module' "$file" | awk '{ print $9 }' ;
|
||||
} >>"$output"
|
||||
|
||||
@@ -11,7 +11,7 @@ if [ -n "$2" ]; then
|
||||
output="$2"
|
||||
echo "Outputting IDs to $output"
|
||||
else
|
||||
output="successful.txt"
|
||||
output="./successful.txt"
|
||||
fi
|
||||
|
||||
{
|
||||
|
||||
13
scripts/tests/README.md
Normal file
13
scripts/tests/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Bash Scripts Testing
|
||||
|
||||
The `bats` framework is included and used to test the scripts included, specifically the scripts designed to parse through the logging output. As this involves delicate regex and indexes, it is necessary to test these.
|
||||
|
||||
## Running Tests
|
||||
|
||||
Running the tests are easy, and can be done with a single command. Once the working directory is this directory, run the following command.
|
||||
|
||||
```bash
|
||||
./bats/bin/bats *.bats
|
||||
```
|
||||
|
||||
This will run all test files that have the `.bats` suffix.
|
||||
1
scripts/tests/bats
Submodule
1
scripts/tests/bats
Submodule
Submodule scripts/tests/bats added at ce5ca2802f
@@ -0,0 +1 @@
|
||||
[2021-06-12 12:49:18,452 - bdfr.downloader - DEBUG] - Submission m2601g skipped due to disabled module Direct
|
||||
3
scripts/tests/example_logfiles/failed_no_downloader.txt
Normal file
3
scripts/tests/example_logfiles/failed_no_downloader.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
[2021-06-12 11:13:35,665 - bdfr.downloader - ERROR] - Could not download submission nxv3ew: No downloader module exists for url https://www.biorxiv.org/content/10.1101/2021.06.11.447961v1?rss=1
|
||||
[2021-06-12 11:14:21,958 - bdfr.downloader - ERROR] - Could not download submission nxv3ek: No downloader module exists for url https://alkossegyedit.hu/termek/pluss-macko-poloval-20cm/?feed_id=34832&_unique_id=60c40a1190ccb&utm_source=Reddit&utm_medium=AEAdmin&utm_campaign=Poster
|
||||
[2021-06-12 11:17:53,456 - bdfr.downloader - ERROR] - Could not download submission nxv3ea: No downloader module exists for url https://www.biorxiv.org/content/10.1101/2021.06.11.448067v1?rss=1
|
||||
2
scripts/tests/example_logfiles/failed_resource_error.txt
Normal file
2
scripts/tests/example_logfiles/failed_resource_error.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
[2021-06-12 11:18:25,794 - bdfr.downloader - ERROR] - Failed to download resource https://i.redd.it/61fniokpjq471.jpg in submission nxv3dt with downloader Direct: Unrecoverable error requesting resource: HTTP Code 404
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
[2021-06-12 08:38:35,657 - bdfr.downloader - ERROR] - Site Gallery failed to download submission nxr7x9: No images found in Reddit gallery
|
||||
[2021-06-12 08:47:22,005 - bdfr.downloader - ERROR] - Site Gallery failed to download submission nxpn0h: Server responded with 503 to https://www.reddit.com/gallery/nxpkvh
|
||||
1
scripts/tests/example_logfiles/failed_write_error.txt
Normal file
1
scripts/tests/example_logfiles/failed_write_error.txt
Normal file
@@ -0,0 +1 @@
|
||||
[2021-06-09 22:01:04,530 - bdfr.downloader - ERROR] - Failed to write file in submission nnboza to C:\Users\Yoga 14\path\to\output\ThotNetwork\KatieCarmine_I POST A NEW VIDEO ALMOST EVERYDAY AND YOU NEVER HAVE TO PAY EXTRA FOR IT! I want to share my sex life with you! Only $6 per month and you get full access to over 400 videos of me getting fuck_nnboza.mp4: [Errno 2] No such file or directory: 'C:\\Users\\Yoga 14\\path\\to\\output\\ThotNetwork\\KatieCarmine_I POST A NEW VIDEO ALMOST EVERYDAY AND YOU NEVER HAVE TO PAY EXTRA FOR IT! I want to share my sex life with you! Only $6 per month and you get full access to over 400 videos of me getting fuck_nnboza.mp4'
|
||||
@@ -0,0 +1,3 @@
|
||||
[2021-06-12 08:41:51,464 - bdfr.downloader - DEBUG] - File /media/smaug/private/reddit/tumblr/nxry0l.jpg from submission nxry0l already exists, continuing
|
||||
[2021-06-12 08:41:51,469 - bdfr.downloader - DEBUG] - File /media/smaug/private/reddit/tumblr/nxrlgn.gif from submission nxrlgn already exists, continuing
|
||||
[2021-06-12 08:41:51,472 - bdfr.downloader - DEBUG] - File /media/smaug/private/reddit/tumblr/nxrq9g.png from submission nxrq9g already exists, continuing
|
||||
@@ -0,0 +1,3 @@
|
||||
[2021-06-10 20:36:48,722 - bdfr.downloader - DEBUG] - Download filter removed nwfirr with URL https://www.youtube.com/watch?v=NVSiX0Tsees
|
||||
[2021-06-12 19:56:36,848 - bdfr.downloader - DEBUG] - Download filter removed nwfgcl with URL https://www.reddit.com/r/MaliciousCompliance/comments/nwfgcl/new_guy_decided_to_play_manager_alright/
|
||||
[2021-06-12 19:56:28,587 - bdfr.downloader - DEBUG] - Download filter removed nxuxjy with URL https://www.reddit.com/r/MaliciousCompliance/comments/nxuxjy/you_want_an_omelette_with_nothing_inside_okay/
|
||||
@@ -0,0 +1,7 @@
|
||||
[2021-06-12 11:58:53,864 - bdfr.downloader - INFO] - Downloaded submission nxui9y from tumblr
|
||||
[2021-06-12 11:58:56,618 - bdfr.downloader - INFO] - Downloaded submission nxsr4r from tumblr
|
||||
[2021-06-12 11:58:59,026 - bdfr.downloader - INFO] - Downloaded submission nxviir from tumblr
|
||||
[2021-06-12 11:59:00,289 - bdfr.downloader - INFO] - Downloaded submission nxusva from tumblr
|
||||
[2021-06-12 11:59:00,735 - bdfr.downloader - INFO] - Downloaded submission nxvko7 from tumblr
|
||||
[2021-06-12 11:59:01,215 - bdfr.downloader - INFO] - Downloaded submission nxvd63 from tumblr
|
||||
[2021-06-12 11:59:13,891 - bdfr.downloader - INFO] - Downloaded submission nn9cor from tumblr
|
||||
1
scripts/tests/example_logfiles/succeed_hard_link.txt
Normal file
1
scripts/tests/example_logfiles/succeed_hard_link.txt
Normal file
@@ -0,0 +1 @@
|
||||
[2021-06-11 17:33:02,118 - bdfr.downloader - INFO] - Hard link made linking /media/smaug/private/reddit/tumblr/nwnp2n.jpg to /media/smaug/private/reddit/tumblr/nwskqb.jpg in submission nwnp2n
|
||||
1
scripts/tests/example_logfiles/succeed_resource_hash.txt
Normal file
1
scripts/tests/example_logfiles/succeed_resource_hash.txt
Normal file
@@ -0,0 +1 @@
|
||||
[2021-06-11 17:33:02,118 - bdfr.downloader - INFO] - Resource hash aaaaaaaaaaaaaaaaaaaaaaa from submission n86jk8 downloaded elsewhere
|
||||
43
scripts/tests/test_extract_failed_ids.bats
Normal file
43
scripts/tests/test_extract_failed_ids.bats
Normal file
@@ -0,0 +1,43 @@
|
||||
setup() {
|
||||
load ./test_helper/bats-support/load
|
||||
load ./test_helper/bats-assert/load
|
||||
}
|
||||
|
||||
teardown() {
|
||||
rm -f failed.txt
|
||||
}
|
||||
|
||||
@test "fail run no logfile" {
|
||||
run ../extract_failed_ids.sh
|
||||
assert_failure
|
||||
}
|
||||
|
||||
@test "fail no downloader module" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_no_downloader.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail resource error" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_resource_error.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail site downloader error" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_sitedownloader_error.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "2" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail failed file write" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_write_error.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail disabled module" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_disabled_module.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
38
scripts/tests/test_extract_successful_ids.bats
Normal file
38
scripts/tests/test_extract_successful_ids.bats
Normal file
@@ -0,0 +1,38 @@
|
||||
setup() {
|
||||
load ./test_helper/bats-support/load
|
||||
load ./test_helper/bats-assert/load
|
||||
}
|
||||
|
||||
teardown() {
|
||||
rm -f successful.txt
|
||||
}
|
||||
|
||||
@test "success downloaded submission" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_downloaded_submission.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "7" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success resource hash" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_resource_hash.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success download filter" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_download_filter.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success already exists" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_already_exists.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success hard link" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_hard_link.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
1
scripts/tests/test_helper/bats-assert
Submodule
1
scripts/tests/test_helper/bats-assert
Submodule
Submodule scripts/tests/test_helper/bats-assert added at e0de84e9c0
1
scripts/tests/test_helper/bats-support
Submodule
1
scripts/tests/test_helper/bats-support
Submodule
Submodule scripts/tests/test_helper/bats-support added at d140a65044
2
tests/integration_tests/__init__.py
Normal file
2
tests/integration_tests/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
108
tests/integration_tests/test_archive_integration.py
Normal file
108
tests/integration_tests/test_archive_integration.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bdfr.__main__ import cli
|
||||
|
||||
does_test_config_exist = Path('../test_config.cfg').exists()
|
||||
|
||||
|
||||
def copy_test_config(run_path: Path):
|
||||
shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg'))
|
||||
|
||||
|
||||
def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path):
|
||||
copy_test_config(run_path)
|
||||
out = [
|
||||
'archive',
|
||||
str(run_path),
|
||||
'-v',
|
||||
'--config', str(Path(run_path, '../test_config.cfg')),
|
||||
'--log', str(Path(run_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'gstd4hk'],
|
||||
['-l', 'm2601g', '-f', 'yaml'],
|
||||
['-l', 'n60t4c', '-f', 'xml'],
|
||||
))
|
||||
def test_cli_archive_single(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'Mindustry', '-L', 25],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--format', 'xml'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--format', 'yaml'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--sort', 'new'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day', '--sort', 'new'],
|
||||
))
|
||||
def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--user', 'me', '--authenticate', '--all-comments', '-L', '10'],
|
||||
['--user', 'me', '--user', 'djnish', '--authenticate', '--all-comments', '-L', '10'],
|
||||
))
|
||||
def test_cli_archive_all_user_comments(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--comment-context', '--link', 'gxqapql'],
|
||||
))
|
||||
def test_cli_archive_full_context(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Converting comment' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'all', '-L', 100],
|
||||
['--subreddit', 'all', '-L', 100, '--sort', 'new'],
|
||||
))
|
||||
def test_cli_archive_long(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
43
tests/integration_tests/test_clone_integration.py
Normal file
43
tests/integration_tests/test_clone_integration.py
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from bdfr.__main__ import cli
|
||||
|
||||
does_test_config_exist = Path('../test_config.cfg').exists()
|
||||
|
||||
|
||||
def copy_test_config(run_path: Path):
|
||||
shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg'))
|
||||
|
||||
|
||||
def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
|
||||
out = [
|
||||
'clone',
|
||||
str(tmp_path),
|
||||
'-v',
|
||||
'--config', 'test_config.cfg',
|
||||
'--log', str(Path(tmp_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'm2601g'],
|
||||
['-s', 'TrollXChromosomes/', '-L', 1],
|
||||
))
|
||||
def test_cli_scrape_general(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Downloaded submission' in result.output
|
||||
assert 'Record for entry item' in result.output
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@@ -9,37 +9,20 @@ from click.testing import CliRunner
|
||||
|
||||
from bdfr.__main__ import cli
|
||||
|
||||
does_test_config_exist = Path('test_config.cfg').exists()
|
||||
does_test_config_exist = Path('../test_config.cfg').exists()
|
||||
|
||||
|
||||
def create_basic_args_for_download_runner(test_args: list[str], tmp_path: Path):
|
||||
def copy_test_config(run_path: Path):
|
||||
shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg'))
|
||||
|
||||
|
||||
def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
|
||||
copy_test_config(run_path)
|
||||
out = [
|
||||
'download', str(tmp_path),
|
||||
'download', str(run_path),
|
||||
'-v',
|
||||
'--config', 'test_config.cfg',
|
||||
'--log', str(Path(tmp_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
|
||||
def create_basic_args_for_archive_runner(test_args: list[str], tmp_path: Path):
|
||||
out = [
|
||||
'archive',
|
||||
str(tmp_path),
|
||||
'-v',
|
||||
'--config', 'test_config.cfg',
|
||||
'--log', str(Path(tmp_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
|
||||
def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
|
||||
out = [
|
||||
'clone',
|
||||
str(tmp_path),
|
||||
'-v',
|
||||
'--config', 'test_config.cfg',
|
||||
'--log', str(Path(tmp_path, 'test_log.txt')),
|
||||
'--config', str(Path(run_path, '../test_config.cfg')),
|
||||
'--log', str(Path(run_path, 'test_log.txt')),
|
||||
] + test_args
|
||||
return out
|
||||
|
||||
@@ -74,6 +57,21 @@ def test_cli_download_subreddits(test_args: list[str], tmp_path: Path):
|
||||
assert 'Added submissions from subreddit ' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.authenticated
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'friends', '-L', 10, '--authenticate'],
|
||||
))
|
||||
def test_cli_download_user_specific_subreddits(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Added submissions from subreddit ' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@@ -163,7 +161,7 @@ def test_cli_download_user_data_bad_me_unauthenticated(test_args: list[str], tmp
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'python', '-L', 10, '--search-existing'],
|
||||
['--subreddit', 'python', '-L', 1, '--search-existing'],
|
||||
))
|
||||
def test_cli_download_search_existing(test_args: list[str], tmp_path: Path):
|
||||
Path(tmp_path, 'test.txt').touch()
|
||||
@@ -180,13 +178,14 @@ def test_cli_download_search_existing(test_args: list[str], tmp_path: Path):
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'tumblr', '-L', '25', '--skip', 'png', '--skip', 'jpg'],
|
||||
['--subreddit', 'MaliciousCompliance', '-L', '25', '--skip', 'txt'],
|
||||
['--subreddit', 'tumblr', '-L', '10', '--skip-domain', 'i.redd.it'],
|
||||
))
|
||||
def test_cli_download_download_filters(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Download filter removed ' in result.output
|
||||
assert any((string in result.output for string in ('Download filter removed ', 'filtered due to URL')))
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -203,71 +202,6 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'gstd4hk'],
|
||||
['-l', 'm2601g', '-f', 'yaml'],
|
||||
['-l', 'n60t4c', '-f', 'xml'],
|
||||
))
|
||||
def test_cli_archive_single(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'Mindustry', '-L', 25],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--format', 'xml'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--format', 'yaml'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--sort', 'new'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day'],
|
||||
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day', '--sort', 'new'],
|
||||
))
|
||||
def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--user', 'me', '--authenticate', '--all-comments', '-L', '10'],
|
||||
['--user', 'me', '--user', 'djnish', '--authenticate', '--all-comments', '-L', '10'],
|
||||
))
|
||||
def test_cli_archive_all_user_comments(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['--subreddit', 'all', '-L', 100],
|
||||
['--subreddit', 'all', '-L', 100, '--sort', 'new'],
|
||||
))
|
||||
def test_cli_archive_long(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert re.search(r'Writing entry .*? to file in .*? format', result.output)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.slow
|
||||
@@ -279,6 +213,7 @@ def test_cli_archive_long(test_args: list[str], tmp_path: Path):
|
||||
['--subreddit', 'submitters', '-L', 10], # Private subreddit
|
||||
['--subreddit', 'donaldtrump', '-L', 10], # Banned subreddit
|
||||
['--user', 'djnish', '--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10],
|
||||
['--subreddit', 'friends', '-L', 10],
|
||||
))
|
||||
def test_cli_download_soft_fail(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
@@ -371,19 +306,3 @@ def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path):
|
||||
assert result.exit_code == 0
|
||||
assert 'skipped due to disabled module' in result.output
|
||||
assert 'Downloaded submission' not in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'm2601g'],
|
||||
['-s', 'TrollXChromosomes/', '-L', 1],
|
||||
))
|
||||
def test_cli_scrape_general(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'Downloaded submission' in result.output
|
||||
assert 'Record for entry item' in result.output
|
||||
@@ -13,6 +13,7 @@ from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import Youtub
|
||||
from bdfr.site_downloaders.gallery import Gallery
|
||||
from bdfr.site_downloaders.gfycat import Gfycat
|
||||
from bdfr.site_downloaders.imgur import Imgur
|
||||
from bdfr.site_downloaders.pornhub import PornHub
|
||||
from bdfr.site_downloaders.redgifs import Redgifs
|
||||
from bdfr.site_downloaders.self_post import SelfPost
|
||||
from bdfr.site_downloaders.youtube import Youtube
|
||||
@@ -44,6 +45,7 @@ from bdfr.site_downloaders.youtube import Youtube
|
||||
('https://streamable.com/dt46y', YoutubeDlFallback),
|
||||
('https://vimeo.com/channels/31259/53576664', YoutubeDlFallback),
|
||||
('http://video.pbs.org/viralplayer/2365173446/', YoutubeDlFallback),
|
||||
('https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0', PornHub),
|
||||
))
|
||||
def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownloader, reddit_instance: praw.Reddit):
|
||||
result = DownloadFactory.pull_lever(test_submission_url)
|
||||
|
||||
@@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
||||
('https://www.reddit.com/gallery/m6lvrh', {
|
||||
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8',
|
||||
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805',
|
||||
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76',
|
||||
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444'
|
||||
@pytest.mark.parametrize(('test_ids', 'expected'), (
|
||||
([
|
||||
{'media_id': '18nzv9ch0hn61'},
|
||||
{'media_id': 'jqkizcch0hn61'},
|
||||
{'media_id': 'k0fnqzbh0hn61'},
|
||||
{'media_id': 'm3gamzbh0hn61'},
|
||||
], {
|
||||
'https://i.redd.it/18nzv9ch0hn61.jpg',
|
||||
'https://i.redd.it/jqkizcch0hn61.jpg',
|
||||
'https://i.redd.it/k0fnqzbh0hn61.jpg',
|
||||
'https://i.redd.it/m3gamzbh0hn61.jpg'
|
||||
}),
|
||||
('https://www.reddit.com/gallery/ljyy27', {
|
||||
'https://preview.redd.it/04vxj25uqih61.png?width=92&'
|
||||
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4',
|
||||
'https://preview.redd.it/0fnx83kpqih61.png?width=241&'
|
||||
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a',
|
||||
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&'
|
||||
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0',
|
||||
'https://preview.redd.it/u37k5gxrqih61.png?width=443&'
|
||||
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862'
|
||||
([
|
||||
{'media_id': '04vxj25uqih61'},
|
||||
{'media_id': '0fnx83kpqih61'},
|
||||
{'media_id': '7zkmr1wqqih61'},
|
||||
{'media_id': 'u37k5gxrqih61'},
|
||||
], {
|
||||
'https://i.redd.it/04vxj25uqih61.png',
|
||||
'https://i.redd.it/0fnx83kpqih61.png',
|
||||
'https://i.redd.it/7zkmr1wqqih61.png',
|
||||
'https://i.redd.it/u37k5gxrqih61.png'
|
||||
}),
|
||||
))
|
||||
def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
results = Gallery._get_links(test_url)
|
||||
def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
|
||||
results = Gallery._get_links(test_ids)
|
||||
assert set(results) == expected
|
||||
|
||||
|
||||
@@ -39,16 +41,24 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
|
||||
('m6lvrh', {
|
||||
'6c8a892ae8066cbe119218bcaac731e1',
|
||||
'93ce177f8cb7994906795f4615114d13',
|
||||
'9a293adf19354f14582608cf22124574',
|
||||
'b73e2c3daee02f99404644ea02f1ae65'
|
||||
'5c42b8341dd56eebef792e86f3981c6a',
|
||||
'8f38d76da46f4057bf2773a778e725ca',
|
||||
'f5776f8f90491c8b770b8e0a6bfa49b3',
|
||||
'fa1a43c94da30026ad19a9813a0ed2c2',
|
||||
}),
|
||||
('ljyy27', {
|
||||
'1bc38bed88f9c4770e22a37122d5c941',
|
||||
'2539a92b78f3968a069df2dffe2279f9',
|
||||
'37dea50281c219b905e46edeefc1a18d',
|
||||
'ec4924cf40549728dcf53dd40bc7a73c'
|
||||
'359c203ec81d0bc00e675f1023673238',
|
||||
'79262fd46bce5bfa550d878a3b898be4',
|
||||
'808c35267f44acb523ce03bfa5687404',
|
||||
'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
|
||||
}),
|
||||
('nxyahw', {
|
||||
'b89a3f41feb73ec1136ec4ffa7353eb1',
|
||||
'cabb76fd6fd11ae6e115a2039eb09f04',
|
||||
}),
|
||||
('obkflw', {
|
||||
'65163f685fb28c5b776e0e77122718be',
|
||||
'2a337eb5b13c34d3ca3f51b5db7c13e9',
|
||||
}),
|
||||
))
|
||||
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):
|
||||
|
||||
@@ -130,6 +130,18 @@ def test_imgur_extension_validation_bad(test_extension: str):
|
||||
'fb6c913d721c0bbb96aa65d7f560d385',
|
||||
),
|
||||
),
|
||||
(
|
||||
'https://i.imgur.com/lFJai6i.gifv',
|
||||
('01a6e79a30bec0e644e5da12365d5071',),
|
||||
),
|
||||
(
|
||||
'https://i.imgur.com/ywSyILa.gifv?',
|
||||
('56d4afc32d2966017c38d98568709b45',),
|
||||
),
|
||||
(
|
||||
'https://imgur.com/ubYwpbk.GIFV',
|
||||
('d4a774aac1667783f9ed3a1bd02fac0c',),
|
||||
),
|
||||
))
|
||||
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
||||
mock_download = Mock()
|
||||
|
||||
25
tests/site_downloaders/test_pornhub.py
Normal file
25
tests/site_downloaders/test_pornhub.py
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from bdfr.resource import Resource
|
||||
from bdfr.site_downloaders.pornhub import PornHub
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize(('test_url', 'expected_hash'), (
|
||||
('https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0', '5f5294b9b97dbb7cb9cf8df278515621'),
|
||||
))
|
||||
def test_find_resources_good(test_url: str, expected_hash: str):
|
||||
test_submission = MagicMock()
|
||||
test_submission.url = test_url
|
||||
downloader = PornHub(test_submission)
|
||||
resources = downloader.find_resources()
|
||||
assert len(resources) == 1
|
||||
assert isinstance(resources[0], Resource)
|
||||
resources[0].download(120)
|
||||
assert resources[0].hash.hexdigest() == expected_hash
|
||||
@@ -31,6 +31,7 @@ def test_get_link(test_url: str, expected: str):
|
||||
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
|
||||
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
|
||||
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
|
||||
('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
|
||||
))
|
||||
def test_download_resource(test_url: str, expected_hash: str):
|
||||
mock_submission = Mock()
|
||||
|
||||
@@ -14,7 +14,7 @@ from bdfr.site_downloaders.youtube import Youtube
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize(('test_url', 'expected_hash'), (
|
||||
('https://www.youtube.com/watch?v=uSm2VDgRIUs', 'f70b704b4b78b9bb5cd032bfc26e4971'),
|
||||
('https://www.youtube.com/watch?v=m-tKnjFwleU', '30314930d853afff8ebc7d8c36a5b833'),
|
||||
('https://www.youtube.com/watch?v=GcI7nxQj7HA', '2bfdbf434ed284623e46f3bf52c36166'),
|
||||
))
|
||||
def test_find_resources_good(test_url: str, expected_hash: str):
|
||||
test_submission = MagicMock()
|
||||
@@ -28,8 +28,9 @@ def test_find_resources_good(test_url: str, expected_hash: str):
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.parametrize(('test_url'), (
|
||||
('https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman-interview-oj-simpson-goliath-chronicles'),
|
||||
@pytest.mark.parametrize('test_url', (
|
||||
'https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman'
|
||||
'-interview-oj-simpson-goliath-chronicles',
|
||||
))
|
||||
def test_find_resources_bad(test_url: str):
|
||||
test_submission = MagicMock()
|
||||
|
||||
@@ -29,6 +29,8 @@ def downloader_mock(args: Configuration):
|
||||
downloader_mock = MagicMock()
|
||||
downloader_mock.args = args
|
||||
downloader_mock.sanitise_subreddit_name = RedditConnector.sanitise_subreddit_name
|
||||
downloader_mock.create_filtered_listing_generator = lambda x: RedditConnector.create_filtered_listing_generator(
|
||||
downloader_mock, x)
|
||||
downloader_mock.split_args_input = RedditConnector.split_args_input
|
||||
downloader_mock.master_hash_list = {}
|
||||
return downloader_mock
|
||||
@@ -37,6 +39,7 @@ def downloader_mock(args: Configuration):
|
||||
def assert_all_results_are_submissions(result_limit: int, results: list[Iterator]) -> list:
|
||||
results = [sub for res in results for sub in res]
|
||||
assert all([isinstance(res, praw.models.Submission) for res in results])
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
if result_limit is not None:
|
||||
assert len(results) == result_limit
|
||||
return results
|
||||
@@ -167,18 +170,20 @@ def test_get_subreddit_normal(
|
||||
downloader_mock: MagicMock,
|
||||
reddit_instance: praw.Reddit,
|
||||
):
|
||||
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
|
||||
downloader_mock.args.limit = limit
|
||||
downloader_mock.args.sort = sort_type
|
||||
downloader_mock.time_filter = RedditConnector.create_time_filter(downloader_mock)
|
||||
downloader_mock.sort_filter = RedditConnector.create_sort_filter(downloader_mock)
|
||||
downloader_mock.determine_sort_function.return_value = RedditConnector.determine_sort_function(downloader_mock)
|
||||
downloader_mock.args.subreddit = test_subreddits
|
||||
downloader_mock.reddit_instance = reddit_instance
|
||||
downloader_mock.sort_filter = RedditConnector.create_sort_filter(downloader_mock)
|
||||
results = RedditConnector.get_subreddits(downloader_mock)
|
||||
test_subreddits = downloader_mock._split_args_input(test_subreddits)
|
||||
test_subreddits = downloader_mock.split_args_input(test_subreddits)
|
||||
results = [sub for res1 in results for sub in res1]
|
||||
assert all([isinstance(res1, praw.models.Submission) for res1 in results])
|
||||
assert all([res.subreddit.display_name in test_subreddits for res in results])
|
||||
assert len(results) <= max_expected_len
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -212,6 +217,7 @@ def test_get_subreddit_search(
|
||||
assert all([isinstance(res, praw.models.Submission) for res in results])
|
||||
assert all([res.subreddit.display_name in test_subreddits for res in results])
|
||||
assert len(results) <= max_expected_len
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -243,6 +249,7 @@ def test_get_multireddits_public(
|
||||
results = [sub for res in results for sub in res]
|
||||
assert all([isinstance(res, praw.models.Submission) for res in results])
|
||||
assert len(results) == limit
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@@ -268,6 +275,7 @@ def test_get_user_submissions(test_user: str, limit: int, downloader_mock: Magic
|
||||
results = RedditConnector.get_user_data(downloader_mock)
|
||||
results = assert_all_results_are_submissions(limit, results)
|
||||
assert all([res.author.name == test_user for res in results])
|
||||
assert not any([isinstance(m, MagicMock) for m in results])
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
|
||||
@@ -9,7 +9,6 @@ from unittest.mock import MagicMock, patch
|
||||
import praw.models
|
||||
import pytest
|
||||
|
||||
import bdfr.site_downloaders.download_factory
|
||||
from bdfr.__main__ import setup_logging
|
||||
from bdfr.configuration import Configuration
|
||||
from bdfr.connector import RedditConnector
|
||||
|
||||
Reference in New Issue
Block a user