#!/bin/bash ################################################################################ # SUBTITLE SCANNER UTILITY ################################################################################ # Scans a directory tree and reports which files have subtitle streams # Useful for: # - Planning which files need processing # - Verifying results after re-encoding # - Estimating workload ################################################################################ set -euo pipefail SCAN_DIR="${1:-/mnt/user/movies}" OUTPUT_MODE="${OUTPUT_MODE:-summary}" # summary, detailed, or csv ################################################################################ # Functions ################################################################################ check_file() { local file="$1" local subtitle_count=$(ffprobe -v error -select_streams s -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$file" 2>/dev/null | wc -l) echo "$subtitle_count" } human_readable_size() { local size=$1 if [[ $size -lt 1024 ]]; then echo "${size}B" elif [[ $size -lt 1048576 ]]; then echo "$(( size / 1024 ))KB" elif [[ $size -lt 1073741824 ]]; then echo "$(( size / 1048576 ))MB" else echo "$(( size / 1073741824 ))GB" fi } ################################################################################ # Main ################################################################################ if [[ ! -d "$SCAN_DIR" ]]; then echo "ERROR: Directory not found: $SCAN_DIR" >&2 exit 1 fi echo "==========================================" echo "Scanning: $SCAN_DIR" echo "==========================================" echo "" total_files=0 files_with_subs=0 files_without_subs=0 total_size=0 size_with_subs=0 # CSV header if needed if [[ "$OUTPUT_MODE" == "csv" ]]; then echo "filepath,subtitle_count,size_bytes" fi while IFS= read -r file; do ((total_files++)) || true subtitle_count=$(check_file "$file") file_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null) total_size=$((total_size + file_size)) if [[ $subtitle_count -gt 0 ]]; then ((files_with_subs++)) || true size_with_subs=$((size_with_subs + file_size)) case "$OUTPUT_MODE" in detailed) echo "[HAS SUBS] $file ($subtitle_count subtitle stream(s), $(human_readable_size $file_size))" ;; csv) echo "$file,$subtitle_count,$file_size" ;; esac else ((files_without_subs++)) || true case "$OUTPUT_MODE" in detailed) echo "[NO SUBS] $file ($(human_readable_size $file_size))" ;; csv) echo "$file,0,$file_size" ;; esac fi # Progress indicator for summary mode if [[ "$OUTPUT_MODE" == "summary" ]] && [[ $((total_files % 10)) -eq 0 ]]; then echo -ne "Scanned $total_files files...\r" fi done < <(find "$SCAN_DIR" -type f \( -name "*.mkv" -o -name "*.mp4" -o -name "*.avi" -o -name "*.m4v" \) | sort) # Summary if [[ "$OUTPUT_MODE" != "csv" ]]; then echo "" echo "==========================================" echo "SUMMARY" echo "==========================================" echo "Total files scanned: $total_files" echo "Files with subtitles: $files_with_subs" echo "Files without subtitles: $files_without_subs" echo "" echo "Total library size: $(human_readable_size $total_size)" echo "Size needing processing: $(human_readable_size $size_with_subs)" if [[ $files_with_subs -gt 0 ]]; then avg_size=$((size_with_subs / files_with_subs)) echo "Average file size: $(human_readable_size $avg_size)" # Rough time estimate (assuming 2-hour movies at 1 fps average) estimated_hours=$((files_with_subs * 2)) echo "" echo "Estimated processing time: ~$estimated_hours hours (at 1 fps average)" echo " (Actual time varies greatly based on hardware and settings)" fi fi echo "" echo "==========================================" # Usage examples if [[ "$OUTPUT_MODE" == "summary" ]]; then echo "" echo "TIP: For more detail, run with:" echo " OUTPUT_MODE=detailed $0 $SCAN_DIR" echo "" echo "Or generate a CSV:" echo " OUTPUT_MODE=csv $0 $SCAN_DIR > subtitles.csv" fi