initial comment
This commit is contained in:
140
scan-subtitles.sh
Normal file
140
scan-subtitles.sh
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# SUBTITLE SCANNER UTILITY
|
||||
################################################################################
|
||||
# Scans a directory tree and reports which files have subtitle streams
|
||||
# Useful for:
|
||||
# - Planning which files need processing
|
||||
# - Verifying results after re-encoding
|
||||
# - Estimating workload
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCAN_DIR="${1:-/mnt/user/movies}"
|
||||
OUTPUT_MODE="${OUTPUT_MODE:-summary}" # summary, detailed, or csv
|
||||
|
||||
################################################################################
|
||||
# Functions
|
||||
################################################################################
|
||||
|
||||
check_file() {
|
||||
local file="$1"
|
||||
local subtitle_count=$(ffprobe -v error -select_streams s -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$file" 2>/dev/null | wc -l)
|
||||
echo "$subtitle_count"
|
||||
}
|
||||
|
||||
human_readable_size() {
|
||||
local size=$1
|
||||
if [[ $size -lt 1024 ]]; then
|
||||
echo "${size}B"
|
||||
elif [[ $size -lt 1048576 ]]; then
|
||||
echo "$(( size / 1024 ))KB"
|
||||
elif [[ $size -lt 1073741824 ]]; then
|
||||
echo "$(( size / 1048576 ))MB"
|
||||
else
|
||||
echo "$(( size / 1073741824 ))GB"
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Main
|
||||
################################################################################
|
||||
|
||||
if [[ ! -d "$SCAN_DIR" ]]; then
|
||||
echo "ERROR: Directory not found: $SCAN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=========================================="
|
||||
echo "Scanning: $SCAN_DIR"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
total_files=0
|
||||
files_with_subs=0
|
||||
files_without_subs=0
|
||||
total_size=0
|
||||
size_with_subs=0
|
||||
|
||||
# CSV header if needed
|
||||
if [[ "$OUTPUT_MODE" == "csv" ]]; then
|
||||
echo "filepath,subtitle_count,size_bytes"
|
||||
fi
|
||||
|
||||
while IFS= read -r file; do
|
||||
((total_files++)) || true
|
||||
|
||||
subtitle_count=$(check_file "$file")
|
||||
file_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null)
|
||||
total_size=$((total_size + file_size))
|
||||
|
||||
if [[ $subtitle_count -gt 0 ]]; then
|
||||
((files_with_subs++)) || true
|
||||
size_with_subs=$((size_with_subs + file_size))
|
||||
|
||||
case "$OUTPUT_MODE" in
|
||||
detailed)
|
||||
echo "[HAS SUBS] $file ($subtitle_count subtitle stream(s), $(human_readable_size $file_size))"
|
||||
;;
|
||||
csv)
|
||||
echo "$file,$subtitle_count,$file_size"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
((files_without_subs++)) || true
|
||||
|
||||
case "$OUTPUT_MODE" in
|
||||
detailed)
|
||||
echo "[NO SUBS] $file ($(human_readable_size $file_size))"
|
||||
;;
|
||||
csv)
|
||||
echo "$file,0,$file_size"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Progress indicator for summary mode
|
||||
if [[ "$OUTPUT_MODE" == "summary" ]] && [[ $((total_files % 10)) -eq 0 ]]; then
|
||||
echo -ne "Scanned $total_files files...\r"
|
||||
fi
|
||||
|
||||
done < <(find "$SCAN_DIR" -type f \( -name "*.mkv" -o -name "*.mp4" -o -name "*.avi" -o -name "*.m4v" \) | sort)
|
||||
|
||||
# Summary
|
||||
if [[ "$OUTPUT_MODE" != "csv" ]]; then
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "SUMMARY"
|
||||
echo "=========================================="
|
||||
echo "Total files scanned: $total_files"
|
||||
echo "Files with subtitles: $files_with_subs"
|
||||
echo "Files without subtitles: $files_without_subs"
|
||||
echo ""
|
||||
echo "Total library size: $(human_readable_size $total_size)"
|
||||
echo "Size needing processing: $(human_readable_size $size_with_subs)"
|
||||
|
||||
if [[ $files_with_subs -gt 0 ]]; then
|
||||
avg_size=$((size_with_subs / files_with_subs))
|
||||
echo "Average file size: $(human_readable_size $avg_size)"
|
||||
|
||||
# Rough time estimate (assuming 2-hour movies at 1 fps average)
|
||||
estimated_hours=$((files_with_subs * 2))
|
||||
echo ""
|
||||
echo "Estimated processing time: ~$estimated_hours hours (at 1 fps average)"
|
||||
echo " (Actual time varies greatly based on hardware and settings)"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
|
||||
# Usage examples
|
||||
if [[ "$OUTPUT_MODE" == "summary" ]]; then
|
||||
echo ""
|
||||
echo "TIP: For more detail, run with:"
|
||||
echo " OUTPUT_MODE=detailed $0 $SCAN_DIR"
|
||||
echo ""
|
||||
echo "Or generate a CSV:"
|
||||
echo " OUTPUT_MODE=csv $0 $SCAN_DIR > subtitles.csv"
|
||||
fi
|
||||
Reference in New Issue
Block a user