141 lines
4.4 KiB
Bash
141 lines
4.4 KiB
Bash
#!/bin/bash
|
|
################################################################################
|
|
# SUBTITLE SCANNER UTILITY
|
|
################################################################################
|
|
# Scans a directory tree and reports which files have subtitle streams
|
|
# Useful for:
|
|
# - Planning which files need processing
|
|
# - Verifying results after re-encoding
|
|
# - Estimating workload
|
|
################################################################################
|
|
|
|
set -euo pipefail
|
|
|
|
SCAN_DIR="${1:-/mnt/user/movies}"
|
|
OUTPUT_MODE="${OUTPUT_MODE:-summary}" # summary, detailed, or csv
|
|
|
|
################################################################################
|
|
# Functions
|
|
################################################################################
|
|
|
|
check_file() {
|
|
local file="$1"
|
|
local subtitle_count=$(ffprobe -v error -select_streams s -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$file" 2>/dev/null | wc -l)
|
|
echo "$subtitle_count"
|
|
}
|
|
|
|
human_readable_size() {
|
|
local size=$1
|
|
if [[ $size -lt 1024 ]]; then
|
|
echo "${size}B"
|
|
elif [[ $size -lt 1048576 ]]; then
|
|
echo "$(( size / 1024 ))KB"
|
|
elif [[ $size -lt 1073741824 ]]; then
|
|
echo "$(( size / 1048576 ))MB"
|
|
else
|
|
echo "$(( size / 1073741824 ))GB"
|
|
fi
|
|
}
|
|
|
|
################################################################################
|
|
# Main
|
|
################################################################################
|
|
|
|
if [[ ! -d "$SCAN_DIR" ]]; then
|
|
echo "ERROR: Directory not found: $SCAN_DIR" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "=========================================="
|
|
echo "Scanning: $SCAN_DIR"
|
|
echo "=========================================="
|
|
echo ""
|
|
|
|
total_files=0
|
|
files_with_subs=0
|
|
files_without_subs=0
|
|
total_size=0
|
|
size_with_subs=0
|
|
|
|
# CSV header if needed
|
|
if [[ "$OUTPUT_MODE" == "csv" ]]; then
|
|
echo "filepath,subtitle_count,size_bytes"
|
|
fi
|
|
|
|
while IFS= read -r file; do
|
|
((total_files++)) || true
|
|
|
|
subtitle_count=$(check_file "$file")
|
|
file_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null)
|
|
total_size=$((total_size + file_size))
|
|
|
|
if [[ $subtitle_count -gt 0 ]]; then
|
|
((files_with_subs++)) || true
|
|
size_with_subs=$((size_with_subs + file_size))
|
|
|
|
case "$OUTPUT_MODE" in
|
|
detailed)
|
|
echo "[HAS SUBS] $file ($subtitle_count subtitle stream(s), $(human_readable_size $file_size))"
|
|
;;
|
|
csv)
|
|
echo "$file,$subtitle_count,$file_size"
|
|
;;
|
|
esac
|
|
else
|
|
((files_without_subs++)) || true
|
|
|
|
case "$OUTPUT_MODE" in
|
|
detailed)
|
|
echo "[NO SUBS] $file ($(human_readable_size $file_size))"
|
|
;;
|
|
csv)
|
|
echo "$file,0,$file_size"
|
|
;;
|
|
esac
|
|
fi
|
|
|
|
# Progress indicator for summary mode
|
|
if [[ "$OUTPUT_MODE" == "summary" ]] && [[ $((total_files % 10)) -eq 0 ]]; then
|
|
echo -ne "Scanned $total_files files...\r"
|
|
fi
|
|
|
|
done < <(find "$SCAN_DIR" -type f \( -name "*.mkv" -o -name "*.mp4" -o -name "*.avi" -o -name "*.m4v" \) | sort)
|
|
|
|
# Summary
|
|
if [[ "$OUTPUT_MODE" != "csv" ]]; then
|
|
echo ""
|
|
echo "=========================================="
|
|
echo "SUMMARY"
|
|
echo "=========================================="
|
|
echo "Total files scanned: $total_files"
|
|
echo "Files with subtitles: $files_with_subs"
|
|
echo "Files without subtitles: $files_without_subs"
|
|
echo ""
|
|
echo "Total library size: $(human_readable_size $total_size)"
|
|
echo "Size needing processing: $(human_readable_size $size_with_subs)"
|
|
|
|
if [[ $files_with_subs -gt 0 ]]; then
|
|
avg_size=$((size_with_subs / files_with_subs))
|
|
echo "Average file size: $(human_readable_size $avg_size)"
|
|
|
|
# Rough time estimate (assuming 2-hour movies at 1 fps average)
|
|
estimated_hours=$((files_with_subs * 2))
|
|
echo ""
|
|
echo "Estimated processing time: ~$estimated_hours hours (at 1 fps average)"
|
|
echo " (Actual time varies greatly based on hardware and settings)"
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
|
|
# Usage examples
|
|
if [[ "$OUTPUT_MODE" == "summary" ]]; then
|
|
echo ""
|
|
echo "TIP: For more detail, run with:"
|
|
echo " OUTPUT_MODE=detailed $0 $SCAN_DIR"
|
|
echo ""
|
|
echo "Or generate a CSV:"
|
|
echo " OUTPUT_MODE=csv $0 $SCAN_DIR > subtitles.csv"
|
|
fi
|