diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c16e947 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Declare files that will always have CRLF line endings on checkout. +*.ps1 text eol=crlf \ No newline at end of file diff --git a/README.md b/README.md index b84aa3d..47c0dde 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ However, these commands are not enough. You should chain parameters in [Options] python3 -m bdfr download ./path/to/output --subreddit Python -L 10 ``` ```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 +``` +```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context +``` +```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` ```bash diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 new file mode 100644 index 0000000..be2d2cb --- /dev/null +++ b/scripts/extract_failed_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./failed.txt" +} + +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_failed_ids.sh b/scripts/extract_failed_ids.sh index f96bd9a..8addf7e 100755 --- a/scripts/extract_failed_ids.sh +++ b/scripts/extract_failed_ids.sh @@ -18,6 +18,6 @@ fi grep 'Could not download submission' "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ; grep 'Failed to download resource' "$file" | awk '{ print $15 }' ; grep 'failed to download submission' "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ; - grep 'Failed to write file' "$file" | awk '{ print $13 }' | rev | cut -c 2- | rev ; + grep 'Failed to write file' "$file" | awk '{ print $14 }' ; grep 'skipped due to disabled module' "$file" | awk '{ print $9 }' ; } >>"$output" diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 new file mode 100644 index 0000000..00722f1 --- /dev/null +++ b/scripts/extract_successful_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 new file mode 100644 index 0000000..5d85b09 --- /dev/null +++ b/scripts/print_summary.ps1 @@ -0,0 +1,30 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Write-Host -NoNewline "Downloaded submissions: " +Write-Host (Select-String -Path $file -Pattern "Downloaded submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Failed downloads: " +Write-Host (Select-String -Path $file -Pattern "failed to download submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Files already downloaded: " +Write-Host (Select-String -Path $file -Pattern "already exists, continuing" -AllMatches).Matches.Count +Write-Host -NoNewline "Hard linked submissions: " +Write-Host (Select-String -Path $file -Pattern "Hard link made" -AllMatches).Matches.Count +Write-Host -NoNewline "Excluded submissions: " +Write-Host (Select-String -Path $file -Pattern "in exclusion list" -AllMatches).Matches.Count +Write-Host -NoNewline "Files with existing hash skipped: " +Write-Host (Select-String -Path $file -Pattern "downloaded elsewhere" -AllMatches).Matches.Count +Write-Host -NoNewline "Submissions from excluded subreddits: " +Write-Host (Select-String -Path $file -Pattern "in skip list" -AllMatches).Matches.Count