Update gallery code to work with NSFW galleries
This commit is contained in:
@@ -5,6 +5,7 @@ import re
|
||||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bdfr.exceptions import SiteDownloaderError
|
||||
@@ -20,21 +21,21 @@ class Gallery(BaseDownloader):
|
||||
super().__init__(post)
|
||||
|
||||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||
image_urls = self._get_links(self.post.url)
|
||||
image_urls = self._get_links(self.post.gallery_data['items'])
|
||||
if not image_urls:
|
||||
raise SiteDownloaderError('No images found in Reddit gallery')
|
||||
return [Resource(self.post, url) for url in image_urls]
|
||||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> list[str]:
|
||||
resource_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
}
|
||||
page = Gallery.retrieve_url(url, headers=resource_headers)
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
|
||||
links = [link.get('href') for link in links]
|
||||
return links
|
||||
@ staticmethod
|
||||
def _get_links(id_dict: list[dict]) -> list[str]:
|
||||
out = []
|
||||
for item in id_dict:
|
||||
image_id = item['media_id']
|
||||
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
|
||||
for extension in possible_extensions:
|
||||
test_url = f'https://i.redd.it/{image_id}{extension}'
|
||||
response = requests.head(test_url)
|
||||
if response.status_code == 200:
|
||||
out.append(test_url)
|
||||
break
|
||||
return out
|
||||
|
||||
@@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
||||
('https://www.reddit.com/gallery/m6lvrh', {
|
||||
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8',
|
||||
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805',
|
||||
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76',
|
||||
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444'
|
||||
@pytest.mark.parametrize(('test_ids', 'expected'), (
|
||||
([
|
||||
{'media_id': '18nzv9ch0hn61'},
|
||||
{'media_id': 'jqkizcch0hn61'},
|
||||
{'media_id': 'k0fnqzbh0hn61'},
|
||||
{'media_id': 'm3gamzbh0hn61'},
|
||||
], {
|
||||
'https://i.redd.it/18nzv9ch0hn61.jpg',
|
||||
'https://i.redd.it/jqkizcch0hn61.jpg',
|
||||
'https://i.redd.it/k0fnqzbh0hn61.jpg',
|
||||
'https://i.redd.it/m3gamzbh0hn61.jpg'
|
||||
}),
|
||||
('https://www.reddit.com/gallery/ljyy27', {
|
||||
'https://preview.redd.it/04vxj25uqih61.png?width=92&'
|
||||
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4',
|
||||
'https://preview.redd.it/0fnx83kpqih61.png?width=241&'
|
||||
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a',
|
||||
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&'
|
||||
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0',
|
||||
'https://preview.redd.it/u37k5gxrqih61.png?width=443&'
|
||||
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862'
|
||||
([
|
||||
{'media_id': '04vxj25uqih61'},
|
||||
{'media_id': '0fnx83kpqih61'},
|
||||
{'media_id': '7zkmr1wqqih61'},
|
||||
{'media_id': 'u37k5gxrqih61'},
|
||||
], {
|
||||
'https://i.redd.it/04vxj25uqih61.png',
|
||||
'https://i.redd.it/0fnx83kpqih61.png',
|
||||
'https://i.redd.it/7zkmr1wqqih61.png',
|
||||
'https://i.redd.it/u37k5gxrqih61.png'
|
||||
}),
|
||||
))
|
||||
def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
results = Gallery._get_links(test_url)
|
||||
def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
|
||||
results = Gallery._get_links(test_ids)
|
||||
assert set(results) == expected
|
||||
|
||||
|
||||
@@ -39,16 +41,20 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
|
||||
('m6lvrh', {
|
||||
'6c8a892ae8066cbe119218bcaac731e1',
|
||||
'93ce177f8cb7994906795f4615114d13',
|
||||
'9a293adf19354f14582608cf22124574',
|
||||
'b73e2c3daee02f99404644ea02f1ae65'
|
||||
'5c42b8341dd56eebef792e86f3981c6a',
|
||||
'8f38d76da46f4057bf2773a778e725ca',
|
||||
'f5776f8f90491c8b770b8e0a6bfa49b3',
|
||||
'fa1a43c94da30026ad19a9813a0ed2c2',
|
||||
}),
|
||||
('ljyy27', {
|
||||
'1bc38bed88f9c4770e22a37122d5c941',
|
||||
'2539a92b78f3968a069df2dffe2279f9',
|
||||
'37dea50281c219b905e46edeefc1a18d',
|
||||
'ec4924cf40549728dcf53dd40bc7a73c'
|
||||
'359c203ec81d0bc00e675f1023673238',
|
||||
'79262fd46bce5bfa550d878a3b898be4',
|
||||
'808c35267f44acb523ce03bfa5687404',
|
||||
'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
|
||||
}),
|
||||
('nxyahw', {
|
||||
'b89a3f41feb73ec1136ec4ffa7353eb1',
|
||||
'cabb76fd6fd11ae6e115a2039eb09f04',
|
||||
}),
|
||||
))
|
||||
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):
|
||||
|
||||
Reference in New Issue
Block a user