From 850faffc29706efc36703363f9baba88b649e331 Mon Sep 17 00:00:00 2001 From: Thayol Date: Wed, 5 Jan 2022 01:17:59 +0100 Subject: [PATCH 1/5] Add PowerShell scripts --- .gitattributes | 2 ++ scripts/extract_failed_ids.ps1 | 21 +++++++++++++++++++++ scripts/extract_successful_ids.ps1 | 21 +++++++++++++++++++++ scripts/print_summary.ps1 | 30 ++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) create mode 100644 .gitattributes create mode 100644 scripts/extract_failed_ids.ps1 create mode 100644 scripts/extract_successful_ids.ps1 create mode 100644 scripts/print_summary.ps1 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c16e947 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Declare files that will always have CRLF line endings on checkout. +*.ps1 text eol=crlf \ No newline at end of file diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 new file mode 100644 index 0000000..17d96f6 --- /dev/null +++ b/scripts/extract_failed_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./failed.txt" +} + +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 12 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 new file mode 100644 index 0000000..3dbb315 --- /dev/null +++ b/scripts/extract_successful_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 new file mode 100644 index 0000000..5d85b09 --- /dev/null +++ b/scripts/print_summary.ps1 @@ -0,0 +1,30 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Write-Host -NoNewline "Downloaded submissions: " +Write-Host (Select-String -Path $file -Pattern "Downloaded submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Failed downloads: " +Write-Host (Select-String -Path $file -Pattern "failed to download submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Files already downloaded: " +Write-Host (Select-String -Path $file -Pattern "already exists, continuing" -AllMatches).Matches.Count +Write-Host -NoNewline "Hard linked submissions: " +Write-Host (Select-String -Path $file -Pattern "Hard link made" -AllMatches).Matches.Count +Write-Host -NoNewline "Excluded submissions: " +Write-Host (Select-String -Path $file -Pattern "in exclusion list" -AllMatches).Matches.Count +Write-Host -NoNewline "Files with existing hash skipped: " +Write-Host (Select-String -Path $file -Pattern "downloaded elsewhere" -AllMatches).Matches.Count +Write-Host -NoNewline "Submissions from excluded subreddits: " +Write-Host (Select-String -Path $file -Pattern "in skip list" -AllMatches).Matches.Count From ac3a8e913df84019b0d6dcd7403d5f9a4e946832 Mon Sep 17 00:00:00 2001 From: Thayol Date: Wed, 5 Jan 2022 13:13:45 +0100 Subject: [PATCH 2/5] Fix wrong offset --- scripts/extract_successful_ids.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 index 3dbb315..00722f1 100644 --- a/scripts/extract_successful_ids.ps1 +++ b/scripts/extract_successful_ids.ps1 @@ -16,6 +16,6 @@ else { Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output -Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output From 8ec45a9302dbf420dbfaed382e3d0758be3fd71c Mon Sep 17 00:00:00 2001 From: Thayol Date: Thu, 6 Jan 2022 04:06:46 +0100 Subject: [PATCH 3/5] Fix Bash script: Failed to write --- scripts/extract_failed_ids.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_failed_ids.sh b/scripts/extract_failed_ids.sh index f96bd9a..8addf7e 100755 --- a/scripts/extract_failed_ids.sh +++ b/scripts/extract_failed_ids.sh @@ -18,6 +18,6 @@ fi grep 'Could not download submission' "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ; grep 'Failed to download resource' "$file" | awk '{ print $15 }' ; grep 'failed to download submission' "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ; - grep 'Failed to write file' "$file" | awk '{ print $13 }' | rev | cut -c 2- | rev ; + grep 'Failed to write file' "$file" | awk '{ print $14 }' ; grep 'skipped due to disabled module' "$file" | awk '{ print $9 }' ; } >>"$output" From 3811ec37fb121675a3d5c3007ab96c9c44794144 Mon Sep 17 00:00:00 2001 From: Thayol Date: Thu, 6 Jan 2022 12:16:44 +0100 Subject: [PATCH 4/5] Fix offset and remove substring --- scripts/extract_failed_ids.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 index 17d96f6..be2d2cb 100644 --- a/scripts/extract_failed_ids.ps1 +++ b/scripts/extract_failed_ids.ps1 @@ -17,5 +17,5 @@ else { Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output -Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 12 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output From 81b7fe853b1ac2761f9ddddfe8072b14e69a72ba Mon Sep 17 00:00:00 2001 From: sinclairkosh <102016413+sinclairkosh@users.noreply.github.com> Date: Tue, 22 Mar 2022 05:53:43 +1100 Subject: [PATCH 5/5] Update Readme with some command clarifications Clarify that fact that downloading by user doesn't work the same way as downloading by subreddit. Feel free to user a better example username. :) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index b84aa3d..47c0dde 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ However, these commands are not enough. You should chain parameters in [Options] python3 -m bdfr download ./path/to/output --subreddit Python -L 10 ``` ```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 +``` +```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context +``` +```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` ```bash