Archiver is smarter for comments (#242)
* Add comment name generation to file name formatter * Refactor to reduce duplication * Refactor archive entry classes * Refactor archiver class a bit * Refactor method * Fix comment retrieval * Add comment-downloading to archiver * Update test * Update test
This commit is contained in:
@@ -3,12 +3,15 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
import dict2xml
|
||||
import praw.models
|
||||
import yaml
|
||||
|
||||
from bulkredditdownloader.archive_entry import ArchiveEntry
|
||||
from bulkredditdownloader.archive_entry.base_archive_entry import BaseArchiveEntry
|
||||
from bulkredditdownloader.archive_entry.comment_archive_entry import CommentArchiveEntry
|
||||
from bulkredditdownloader.archive_entry.submission_archive_entry import SubmissionArchiveEntry
|
||||
from bulkredditdownloader.configuration import Configuration
|
||||
from bulkredditdownloader.downloader import RedditDownloader
|
||||
from bulkredditdownloader.exceptions import ArchiverError
|
||||
@@ -25,41 +28,60 @@ class Archiver(RedditDownloader):
|
||||
for generator in self.reddit_lists:
|
||||
for submission in generator:
|
||||
logger.debug(f'Attempting to archive submission {submission.id}')
|
||||
self._write_submission(submission)
|
||||
self._write_entry(submission)
|
||||
|
||||
def _write_submission(self, submission: praw.models.Submission):
|
||||
archive_entry = ArchiveEntry(submission)
|
||||
def _get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
||||
supplied_submissions = []
|
||||
for sub_id in self.args.link:
|
||||
if len(sub_id) == 6:
|
||||
supplied_submissions.append(self.reddit_instance.submission(id=sub_id))
|
||||
elif re.match(r'^\w{7}$', sub_id):
|
||||
supplied_submissions.append(self.reddit_instance.comment(id=sub_id))
|
||||
else:
|
||||
supplied_submissions.append(self.reddit_instance.submission(url=sub_id))
|
||||
return [supplied_submissions]
|
||||
|
||||
@staticmethod
|
||||
def _pull_lever_entry_factory(praw_item: (praw.models.Submission, praw.models.Comment)) -> BaseArchiveEntry:
|
||||
if isinstance(praw_item, praw.models.Submission):
|
||||
return SubmissionArchiveEntry(praw_item)
|
||||
elif isinstance(praw_item, praw.models.Comment):
|
||||
return CommentArchiveEntry(praw_item)
|
||||
else:
|
||||
raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}')
|
||||
|
||||
def _write_entry(self, praw_item: (praw.models.Submission, praw.models.Comment)):
|
||||
archive_entry = self._pull_lever_entry_factory(praw_item)
|
||||
if self.args.format == 'json':
|
||||
self._write_submission_json(archive_entry)
|
||||
self._write_entry_json(archive_entry)
|
||||
elif self.args.format == 'xml':
|
||||
self._write_submission_xml(archive_entry)
|
||||
self._write_entry_xml(archive_entry)
|
||||
elif self.args.format == 'yaml':
|
||||
self._write_submission_yaml(archive_entry)
|
||||
self._write_entry_yaml(archive_entry)
|
||||
else:
|
||||
raise ArchiverError(f'Unknown format {self.args.format} given')
|
||||
logger.info(f'Record for submission {submission.id} written to disk')
|
||||
logger.info(f'Record for entry item {praw_item.id} written to disk')
|
||||
|
||||
def _write_submission_json(self, entry: ArchiveEntry):
|
||||
resource = Resource(entry.submission, '', '.json')
|
||||
def _write_entry_json(self, entry: BaseArchiveEntry):
|
||||
resource = Resource(entry.source, '', '.json')
|
||||
content = json.dumps(entry.compile())
|
||||
self._write_content_to_disk(resource, content)
|
||||
|
||||
def _write_entry_xml(self, entry: BaseArchiveEntry):
|
||||
resource = Resource(entry.source, '', '.xml')
|
||||
content = dict2xml.dict2xml(entry.compile(), wrap='root')
|
||||
self._write_content_to_disk(resource, content)
|
||||
|
||||
def _write_entry_yaml(self, entry: BaseArchiveEntry):
|
||||
resource = Resource(entry.source, '', '.yaml')
|
||||
content = yaml.dump(entry.compile())
|
||||
self._write_content_to_disk(resource, content)
|
||||
|
||||
def _write_content_to_disk(self, resource: Resource, content: str):
|
||||
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
|
||||
file_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
with open(file_path, 'w') as file:
|
||||
logger.debug(f'Writing submission {entry.submission.id} to file in JSON format at {file_path}')
|
||||
json.dump(entry.compile(), file)
|
||||
|
||||
def _write_submission_xml(self, entry: ArchiveEntry):
|
||||
resource = Resource(entry.submission, '', '.xml')
|
||||
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
|
||||
file_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
with open(file_path, 'w') as file:
|
||||
logger.debug(f'Writing submission {entry.submission.id} to file in XML format at {file_path}')
|
||||
xml_entry = dict2xml.dict2xml(entry.compile(), wrap='root')
|
||||
file.write(xml_entry)
|
||||
|
||||
def _write_submission_yaml(self, entry: ArchiveEntry):
|
||||
resource = Resource(entry.submission, '', '.yaml')
|
||||
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
|
||||
file_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
with open(file_path, 'w') as file:
|
||||
logger.debug(f'Writing submission {entry.submission.id} to file in YAML format at {file_path}')
|
||||
yaml.dump(entry.compile(), file)
|
||||
logger.debug(
|
||||
f'Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}'
|
||||
f' format at {file_path}')
|
||||
file.write(content)
|
||||
|
||||
Reference in New Issue
Block a user