#!/usr/bin/env python3 """ ENCODERPRO - PHASE 3 ==================== High-performance media re-encoding with GPU acceleration, parallel processing, and advanced encoding profiles. Features: - GPU acceleration (NVENC, QSV, VAAPI) - Automatic encoder detection and fallback - Parallel processing with worker pools - Multiple encoding profiles - Progress tracking with ETA - Resource management (CPU/GPU limits) - Advanced encoding rules (resolution, HDR, audio) - Docker-ready architecture """ import argparse import concurrent.futures import json import logging import multiprocessing import os import re import shutil import signal import sqlite3 import subprocess import sys import threading import time from dataclasses import dataclass, asdict from datetime import datetime, timedelta from enum import Enum from pathlib import Path from typing import Dict, List, Optional, Tuple, Any __version__ = "3.0.0" # ============================================================================= # CONSTANTS & ENUMS # ============================================================================= class ProcessingState(Enum): """File processing states""" DISCOVERED = "discovered" # Found during scan, not selected yet PENDING = "pending" # User selected for encoding PROCESSING = "processing" # Currently being encoded COMPLETED = "completed" # Successfully encoded FAILED = "failed" # Encoding failed, can retry SKIPPED = "skipped" # No subtitles or excluded class EncoderType(Enum): """Hardware encoder types""" CPU_X265 = "cpu_x265" CPU_X264 = "cpu_x264" CPU_AV1 = "cpu_av1" NVIDIA_NVENC_H265 = "nvenc_h265" NVIDIA_NVENC_H264 = "nvenc_h264" NVIDIA_NVENC_AV1 = "nvenc_av1" INTEL_QSV_H265 = "qsv_h265" INTEL_QSV_H264 = "qsv_h264" INTEL_QSV_AV1 = "qsv_av1" AMD_VAAPI_H265 = "vaapi_h265" AMD_VAAPI_H264 = "vaapi_h264" AMD_VAAPI_AV1 = "vaapi_av1" @dataclass class EncodingProfile: """Encoding profile configuration""" name: str encoder: EncoderType preset: str quality: int # CRF or QP depending on encoder audio_codec: str = "copy" max_resolution: Optional[str] = None # e.g., "1920x1080" hdr_handling: str = "preserve" # preserve, tonemap, strip def to_dict(self) -> Dict: return asdict(self) @dataclass class EncoderCapabilities: """System encoder capabilities""" has_nvenc: bool = False has_qsv: bool = False has_vaapi: bool = False has_x265: bool = False has_x264: bool = False has_av1: bool = False has_nvenc_av1: bool = False has_qsv_av1: bool = False has_vaapi_av1: bool = False nvenc_devices: List[int] = None def __post_init__(self): if self.nvenc_devices is None: self.nvenc_devices = [] # ============================================================================= # CONFIGURATION # ============================================================================= class Config: """Configuration container for Phase 3""" def __init__(self, config_dict: dict): # Phase 2 settings self.movies_dir = Path(config_dict.get('movies_dir', '/mnt/user/movies')) self.archive_dir = Path(config_dict.get('archive_dir', '/mnt/user/archive/movies')) self.work_dir = Path(config_dict.get('work_dir', '/mnt/user/temp/reencode-work')) self.state_db = Path(config_dict.get('state_db', '/var/lib/reencode/state.db')) self.log_dir = Path(config_dict.get('log_dir', '/var/log/reencode')) # Phase 3: Parallel processing parallel = config_dict.get('parallel', {}) self.max_workers = parallel.get('max_workers', 1) self.gpu_slots = parallel.get('gpu_slots', 1) # Concurrent GPU encodes self.cpu_slots = parallel.get('cpu_slots', multiprocessing.cpu_count()) # Phase 3: Profiles profiles_config = config_dict.get('profiles', {}) self.default_profile = profiles_config.get('default', 'balanced_gpu') self.profiles = self._load_profiles(profiles_config.get('definitions', {})) # Processing settings processing = config_dict.get('processing', {}) self.file_extensions = processing.get('file_extensions', ['mkv', 'mp4', 'avi', 'm4v']) self.skip_without_subtitles = processing.get('skip_without_subtitles', True) self.cleanup_stale_work = processing.get('cleanup_stale_work', True) # Phase 3: Advanced options advanced = config_dict.get('advanced', {}) self.auto_detect_encoders = advanced.get('auto_detect_encoders', True) self.prefer_gpu = advanced.get('prefer_gpu', True) self.fallback_to_cpu = advanced.get('fallback_to_cpu', True) self.progress_interval = advanced.get('progress_interval', 10) # seconds # Ensure directories exist self.work_dir.mkdir(parents=True, exist_ok=True) self.archive_dir.mkdir(parents=True, exist_ok=True) self.state_db.parent.mkdir(parents=True, exist_ok=True) self.log_dir.mkdir(parents=True, exist_ok=True) def _load_profiles(self, profiles_dict: Dict) -> Dict[str, EncodingProfile]: """Load encoding profiles from configuration""" profiles = {} # Default profiles if none specified if not profiles_dict: profiles_dict = { 'balanced_gpu': { 'encoder': 'nvenc_h265', 'preset': 'p4', 'quality': 23 }, 'fast_gpu': { 'encoder': 'nvenc_h264', 'preset': 'p1', 'quality': 26 }, 'quality_cpu': { 'encoder': 'cpu_x265', 'preset': 'slow', 'quality': 20 }, 'balanced_cpu': { 'encoder': 'cpu_x265', 'preset': 'medium', 'quality': 23 } } for name, profile_config in profiles_dict.items(): encoder_str = profile_config.get('encoder', 'cpu_x265') # Map string to EncoderType try: encoder = EncoderType[encoder_str.upper()] except KeyError: encoder = EncoderType.CPU_X265 profiles[name] = EncodingProfile( name=name, encoder=encoder, preset=profile_config.get('preset', 'medium'), quality=profile_config.get('quality', 23), audio_codec=profile_config.get('audio_codec', 'copy'), max_resolution=profile_config.get('max_resolution'), hdr_handling=profile_config.get('hdr_handling', 'preserve') ) return profiles def get_profile(self, name: Optional[str] = None) -> EncodingProfile: """Get encoding profile by name""" profile_name = name or self.default_profile return self.profiles.get(profile_name, list(self.profiles.values())[0]) # ============================================================================= # ENCODER DETECTION # ============================================================================= class EncoderDetector: """Detects available hardware encoders""" @staticmethod def detect_capabilities() -> EncoderCapabilities: """Detect available encoders on the system""" caps = EncoderCapabilities() # Check FFmpeg encoders try: result = subprocess.run( ['ffmpeg', '-hide_banner', '-encoders'], capture_output=True, text=True, timeout=10 ) encoders_output = result.stdout.lower() # CPU encoders caps.has_x265 = 'libx265' in encoders_output caps.has_x264 = 'libx264' in encoders_output caps.has_av1 = 'libsvtav1' in encoders_output or 'libaom-av1' in encoders_output # NVIDIA NVENC caps.has_nvenc = 'hevc_nvenc' in encoders_output or 'h264_nvenc' in encoders_output caps.has_nvenc_av1 = 'av1_nvenc' in encoders_output # Intel QSV caps.has_qsv = 'hevc_qsv' in encoders_output or 'h264_qsv' in encoders_output caps.has_qsv_av1 = 'av1_qsv' in encoders_output # AMD VAAPI caps.has_vaapi = 'hevc_vaapi' in encoders_output or 'h264_vaapi' in encoders_output caps.has_vaapi_av1 = 'av1_vaapi' in encoders_output except Exception as e: logging.warning(f"Failed to detect encoders: {e}") # Detect NVIDIA GPU devices if caps.has_nvenc: caps.nvenc_devices = EncoderDetector._detect_nvidia_gpus() return caps @staticmethod def _detect_nvidia_gpus() -> List[int]: """Detect NVIDIA GPU device IDs""" try: result = subprocess.run( ['nvidia-smi', '--query-gpu=index', '--format=csv,noheader'], capture_output=True, text=True, timeout=5 ) if result.returncode == 0: return [int(line.strip()) for line in result.stdout.strip().split('\n') if line.strip()] except: pass return [] @staticmethod def select_best_encoder(caps: EncoderCapabilities, prefer_gpu: bool = True) -> EncoderType: """Select best available encoder""" if prefer_gpu: # Priority: NVENC > QSV > VAAPI > CPU if caps.has_nvenc: return EncoderType.NVIDIA_NVENC_H265 elif caps.has_qsv: return EncoderType.INTEL_QSV_H265 elif caps.has_vaapi: return EncoderType.AMD_VAAPI_H265 # Fallback to CPU if caps.has_x265: return EncoderType.CPU_X265 elif caps.has_x264: return EncoderType.CPU_X264 raise RuntimeError("No suitable encoder found") # ============================================================================= # DATABASE (Extended from Phase 2) # ============================================================================= class StateDatabase: """Extended database with Phase 3 features""" def __init__(self, db_path: Path): self.db_path = db_path self.conn = None self._lock = threading.Lock() self._init_database() def _init_database(self): """Initialize database schema""" self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False) self.conn.row_factory = sqlite3.Row cursor = self.conn.cursor() # Main files table (Phase 2 schema) cursor.execute(""" CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, filepath TEXT UNIQUE NOT NULL, relative_path TEXT NOT NULL, state TEXT NOT NULL, has_subtitles BOOLEAN, original_size INTEGER, encoded_size INTEGER, subtitle_count INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, started_at TIMESTAMP, completed_at TIMESTAMP, error_message TEXT, profile_name TEXT, encoder_used TEXT, encode_time_seconds REAL, fps REAL ) """) # Phase 3: Processing history cursor.execute(""" CREATE TABLE IF NOT EXISTS processing_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, profile_name TEXT, encoder_used TEXT, started_at TIMESTAMP, completed_at TIMESTAMP, success BOOLEAN, error_message TEXT, original_size INTEGER, encoded_size INTEGER, encode_time_seconds REAL, fps REAL, FOREIGN KEY (file_id) REFERENCES files (id) ) """) # Indices (core columns only) cursor.execute("CREATE INDEX IF NOT EXISTS idx_state ON files(state)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_filepath ON files(filepath)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_profile ON files(profile_name)") # Migration: Add new columns if they don't exist cursor.execute("PRAGMA table_info(files)") columns = {row[1] for row in cursor.fetchall()} migrations = [ ("video_codec", "ALTER TABLE files ADD COLUMN video_codec TEXT"), ("audio_codec", "ALTER TABLE files ADD COLUMN audio_codec TEXT"), ("audio_channels", "ALTER TABLE files ADD COLUMN audio_channels INTEGER"), ("width", "ALTER TABLE files ADD COLUMN width INTEGER"), ("height", "ALTER TABLE files ADD COLUMN height INTEGER"), ("duration", "ALTER TABLE files ADD COLUMN duration REAL"), ("bitrate", "ALTER TABLE files ADD COLUMN bitrate INTEGER"), ("container_format", "ALTER TABLE files ADD COLUMN container_format TEXT"), ("file_hash", "ALTER TABLE files ADD COLUMN file_hash TEXT"), ] for column_name, alter_sql in migrations: if column_name not in columns: logging.info(f"Adding column '{column_name}' to files table") cursor.execute(alter_sql) # Create indices for migrated columns cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_hash ON files(file_hash)") self.conn.commit() def add_file(self, filepath: str, relative_path: str, has_subtitles: bool, subtitle_count: int, original_size: int, video_codec: str = None, audio_codec: str = None, audio_channels: int = None, width: int = None, height: int = None, duration: float = None, bitrate: int = None, container_format: str = None, file_hash: str = None): """Add or update a file in the database""" with self._lock: cursor = self.conn.cursor() # All scanned files are marked as DISCOVERED (found but not selected) # Users can then filter and select which files to encode # Only when user selects files do they become PENDING state = ProcessingState.DISCOVERED.value cursor.execute(""" INSERT INTO files (filepath, relative_path, state, has_subtitles, subtitle_count, original_size, video_codec, audio_codec, audio_channels, width, height, duration, bitrate, container_format, file_hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(filepath) DO UPDATE SET state = excluded.state, has_subtitles = excluded.has_subtitles, subtitle_count = excluded.subtitle_count, original_size = excluded.original_size, video_codec = excluded.video_codec, audio_codec = excluded.audio_codec, audio_channels = excluded.audio_channels, width = excluded.width, height = excluded.height, duration = excluded.duration, bitrate = excluded.bitrate, container_format = excluded.container_format, file_hash = excluded.file_hash, updated_at = CURRENT_TIMESTAMP """, (filepath, relative_path, state, has_subtitles, subtitle_count, original_size, video_codec, audio_codec, audio_channels, width, height, duration, bitrate, container_format, file_hash)) self.conn.commit() def get_file(self, filepath: str) -> Optional[Dict]: """Get file record""" with self._lock: cursor = self.conn.cursor() cursor.execute("SELECT * FROM files WHERE filepath = ?", (filepath,)) row = cursor.fetchone() return dict(row) if row else None def find_duplicates_by_hash(self, file_hash: str) -> List[Dict]: """Find all files with the same content hash""" with self._lock: cursor = self.conn.cursor() cursor.execute("SELECT * FROM files WHERE file_hash = ?", (file_hash,)) rows = cursor.fetchall() return [dict(row) for row in rows] def update_state(self, filepath: str, state: ProcessingState, error_message: Optional[str] = None): """Update file processing state""" with self._lock: cursor = self.conn.cursor() timestamp_field = None if state == ProcessingState.PROCESSING: timestamp_field = "started_at" elif state in (ProcessingState.COMPLETED, ProcessingState.FAILED, ProcessingState.SKIPPED): timestamp_field = "completed_at" if timestamp_field: cursor.execute(f""" UPDATE files SET state = ?, error_message = ?, {timestamp_field} = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE filepath = ? """, (state.value, error_message, filepath)) else: cursor.execute(""" UPDATE files SET state = ?, error_message = ?, updated_at = CURRENT_TIMESTAMP WHERE filepath = ? """, (state.value, error_message, filepath)) self.conn.commit() def update_processing_info(self, filepath: str, profile_name: str, encoder: str, encoded_size: int, encode_time: float, fps: float): """Update processing information""" with self._lock: cursor = self.conn.cursor() cursor.execute(""" UPDATE files SET encoded_size = ?, profile_name = ?, encoder_used = ?, encode_time_seconds = ?, fps = ? WHERE filepath = ? """, (encoded_size, profile_name, encoder, encode_time, fps, filepath)) self.conn.commit() def get_files_by_state(self, state: ProcessingState, limit: Optional[int] = None) -> List[Dict]: """Get files in a given state""" with self._lock: cursor = self.conn.cursor() query = "SELECT * FROM files WHERE state = ? ORDER BY created_at" if limit: query += f" LIMIT {limit}" cursor.execute(query, (state.value,)) return [dict(row) for row in cursor.fetchall()] def get_statistics(self) -> Dict: """Get processing statistics""" with self._lock: cursor = self.conn.cursor() stats = {} for state in ProcessingState: cursor.execute("SELECT COUNT(*) FROM files WHERE state = ?", (state.value,)) stats[state.value] = cursor.fetchone()[0] # Size statistics cursor.execute(""" SELECT SUM(original_size), SUM(encoded_size), AVG(fps), AVG(encode_time_seconds) FROM files WHERE state = ? """, (ProcessingState.COMPLETED.value,)) row = cursor.fetchone() stats['original_size_completed'] = row[0] or 0 stats['encoded_size_completed'] = row[1] or 0 stats['avg_fps'] = row[2] or 0 stats['avg_encode_time'] = row[3] or 0 # Encoder usage cursor.execute(""" SELECT encoder_used, COUNT(*) as count FROM files WHERE state = ? AND encoder_used IS NOT NULL GROUP BY encoder_used """, (ProcessingState.COMPLETED.value,)) stats['encoder_usage'] = {row[0]: row[1] for row in cursor.fetchall()} return stats def reset_processing_files(self): """Reset files stuck in 'processing' state""" with self._lock: cursor = self.conn.cursor() cursor.execute(""" UPDATE files SET state = ?, updated_at = CURRENT_TIMESTAMP WHERE state = ? """, (ProcessingState.PENDING.value, ProcessingState.PROCESSING.value)) reset_count = cursor.rowcount self.conn.commit() return reset_count def close(self): """Close database connection""" if self.conn: self.conn.close() # ============================================================================= # MEDIA INSPECTION # ============================================================================= class MediaInspector: """Enhanced media inspection for Phase 3""" @staticmethod def has_subtitles(filepath: Path) -> Tuple[bool, int]: """Check if file has subtitle streams""" try: result = subprocess.run( ['ffprobe', '-v', 'error', '-select_streams', 's', '-show_entries', 'stream=codec_name', '-of', 'default=noprint_wrappers=1:nokey=1', str(filepath)], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: subtitle_streams = [s for s in result.stdout.strip().split('\n') if s] count = len(subtitle_streams) return (count > 0, count) return (False, 0) except: return (False, 0) @staticmethod def get_media_info(filepath: Path) -> Dict[str, Any]: """Get detailed media information""" try: result = subprocess.run( ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', str(filepath)], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: return json.loads(result.stdout) except Exception as e: logging.error(f"Failed to get media info for {filepath}: {e}") return {} @staticmethod def validate_file(filepath: Path) -> bool: """Validate media file""" try: result = subprocess.run( ['ffprobe', '-v', 'error', str(filepath)], capture_output=True, timeout=30 ) return result.returncode == 0 except: return False @staticmethod def get_file_hash(filepath: Path, chunk_size: int = 8192) -> str: """ Calculate a fast hash of the file using first/last chunks + size. This is faster than hashing the entire file for large videos. """ import hashlib try: file_size = filepath.stat().st_size # For small files (<100MB), hash the entire file if file_size < 100 * 1024 * 1024: hasher = hashlib.sha256() with open(filepath, 'rb') as f: while chunk := f.read(chunk_size): hasher.update(chunk) return hasher.hexdigest() # For large files, hash: size + first 64KB + middle 64KB + last 64KB hasher = hashlib.sha256() hasher.update(str(file_size).encode()) with open(filepath, 'rb') as f: # First chunk hasher.update(f.read(65536)) # Middle chunk f.seek(file_size // 2) hasher.update(f.read(65536)) # Last chunk f.seek(-65536, 2) hasher.update(f.read(65536)) return hasher.hexdigest() except Exception as e: logging.error(f"Failed to hash file {filepath}: {e}") return None # ============================================================================= # FILE PROCESSOR (Enhanced for Phase 3) # ============================================================================= class FileProcessor: """Enhanced file processor with GPU support and progress tracking""" def __init__(self, config: Config, db: StateDatabase, caps: EncoderCapabilities): self.config = config self.db = db self.caps = caps self.logger = logging.getLogger(f'FileProcessor-{id(self)}') self._stop_flag = threading.Event() def stop(self): """Signal processor to stop""" self._stop_flag.set() def process_file(self, filepath: Path, profile_name: Optional[str] = None) -> bool: """Process a single file with given profile""" if self._stop_flag.is_set(): return False relative_path = filepath.relative_to(self.config.movies_dir) self.logger.info(f"Processing: {relative_path}") # Get profile profile = self.config.get_profile(profile_name) # Select appropriate encoder (may fallback if GPU not available) encoder = self._select_encoder(profile.encoder) self.db.update_state(str(filepath), ProcessingState.PROCESSING) start_time = time.time() try: # Create work path work_path = self.config.work_dir / relative_path # Change extension to .mp4 if encoding HEVC/AV1 and original is .m4v # (.m4v doesn't support HEVC or AV1 codecs) is_modern_codec = encoder in ( EncoderType.INTEL_QSV_H265, EncoderType.NVIDIA_NVENC_H265, EncoderType.AMD_VAAPI_H265, EncoderType.CPU_X265, EncoderType.INTEL_QSV_AV1, EncoderType.NVIDIA_NVENC_AV1, EncoderType.AMD_VAAPI_AV1, EncoderType.CPU_AV1 ) if is_modern_codec and work_path.suffix.lower() == '.m4v': work_path = work_path.with_suffix('.mp4') relative_path = relative_path.with_suffix('.mp4') codec_name = "HEVC" if "H265" in encoder.name else "AV1" if "AV1" in encoder.name else "modern codec" self.logger.info(f"Changed output extension to .mp4 for {codec_name} compatibility") work_path.parent.mkdir(parents=True, exist_ok=True) # Re-encode with progress tracking if not self._reencode(filepath, work_path, profile, encoder): raise Exception("Encoding failed") # Calculate stats encode_time = time.time() - start_time # Verify output if not self._verify_output(work_path): raise Exception("Output verification failed") # Get encoding stats encoded_size = work_path.stat().st_size original_size = filepath.stat().st_size # Calculate FPS (approximate) media_info = MediaInspector.get_media_info(filepath) duration = float(media_info.get('format', {}).get('duration', 0)) fps = duration / encode_time if duration and encode_time > 0 else 0 # Archive original archive_path = self.config.archive_dir / relative_path.with_suffix(filepath.suffix) archive_path.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(filepath), str(archive_path)) # Move encoded file into place (with potentially new extension) final_path = self.config.movies_dir / relative_path shutil.move(str(work_path), str(final_path)) # Update database self.db.update_processing_info( str(filepath), profile.name, encoder.name, encoded_size, encode_time, fps ) self.db.update_state(str(filepath), ProcessingState.COMPLETED) # Log success savings = original_size - encoded_size savings_pct = (savings / original_size * 100) if original_size > 0 else 0 self.logger.info( f"[OK] Completed: {relative_path} | " f"Encoder: {encoder.name} | " f"Time: {encode_time:.1f}s | " f"FPS: {fps:.2f} | " f"Saved: {savings_pct:.1f}%" ) return True except Exception as e: self.logger.error(f"[FAIL] Failed: {relative_path} - {e}") self.db.update_state(str(filepath), ProcessingState.FAILED, str(e)) # Cleanup work file if work_path.exists(): work_path.unlink() return False def _select_encoder(self, preferred: EncoderType) -> EncoderType: """Select encoder with fallback logic""" # Check if preferred encoder is available if self._is_encoder_available(preferred): return preferred # Try to find GPU alternative if self.config.prefer_gpu: for encoder in [EncoderType.NVIDIA_NVENC_H265, EncoderType.INTEL_QSV_H265, EncoderType.AMD_VAAPI_H265]: if self._is_encoder_available(encoder): self.logger.info(f"Falling back to {encoder.name}") return encoder # Fallback to CPU if self.config.fallback_to_cpu: if self.caps.has_x265: self.logger.info("Falling back to CPU H.265") return EncoderType.CPU_X265 elif self.caps.has_x264: self.logger.info("Falling back to CPU H.264") return EncoderType.CPU_X264 raise RuntimeError(f"Encoder {preferred.name} not available and no fallback found") def _is_encoder_available(self, encoder: EncoderType) -> bool: """Check if encoder is available""" if encoder == EncoderType.CPU_X265: return self.caps.has_x265 elif encoder == EncoderType.CPU_X264: return self.caps.has_x264 elif encoder in (EncoderType.NVIDIA_NVENC_H265, EncoderType.NVIDIA_NVENC_H264): return self.caps.has_nvenc elif encoder in (EncoderType.INTEL_QSV_H265, EncoderType.INTEL_QSV_H264): return self.caps.has_qsv elif encoder in (EncoderType.AMD_VAAPI_H265, EncoderType.AMD_VAAPI_H264): return self.caps.has_vaapi return False def _build_ffmpeg_command(self, input_path: Path, output_path: Path, profile: EncodingProfile, encoder: EncoderType) -> List[str]: """Build FFmpeg command for given encoder and profile""" cmd = ['ffmpeg', '-hide_banner', '-loglevel', 'warning', '-stats'] # Input cmd.extend(['-i', str(input_path)]) # Hardware acceleration setup if encoder in (EncoderType.NVIDIA_NVENC_H265, EncoderType.NVIDIA_NVENC_H264, EncoderType.NVIDIA_NVENC_AV1): # NVENC if encoder == EncoderType.NVIDIA_NVENC_H265: video_codec = 'hevc_nvenc' elif encoder == EncoderType.NVIDIA_NVENC_AV1: video_codec = 'av1_nvenc' else: video_codec = 'h264_nvenc' cmd.extend([ '-map', '0:v', '-map', '0:a', '-c:v', video_codec, '-preset', profile.preset, '-cq', str(profile.quality), '-c:a', profile.audio_codec, '-sn' ]) elif encoder in (EncoderType.INTEL_QSV_H265, EncoderType.INTEL_QSV_H264, EncoderType.INTEL_QSV_AV1): # QSV if encoder == EncoderType.INTEL_QSV_H265: video_codec = 'hevc_qsv' elif encoder == EncoderType.INTEL_QSV_AV1: video_codec = 'av1_qsv' else: video_codec = 'h264_qsv' cmd.extend([ '-map', '0:v', '-map', '0:a', '-c:v', video_codec, '-preset', profile.preset, '-global_quality', str(profile.quality), '-c:a', profile.audio_codec, '-sn' ]) elif encoder in (EncoderType.AMD_VAAPI_H265, EncoderType.AMD_VAAPI_H264, EncoderType.AMD_VAAPI_AV1): # VAAPI if encoder == EncoderType.AMD_VAAPI_H265: video_codec = 'hevc_vaapi' elif encoder == EncoderType.AMD_VAAPI_AV1: video_codec = 'av1_vaapi' else: video_codec = 'h264_vaapi' cmd.extend([ '-vaapi_device', '/dev/dri/renderD128', '-map', '0:v', '-map', '0:a', '-c:v', video_codec, '-qp', str(profile.quality), '-c:a', profile.audio_codec, '-sn' ]) else: # CPU encoding if encoder == EncoderType.CPU_X265: video_codec = 'libx265' elif encoder == EncoderType.CPU_AV1: video_codec = 'libsvtav1' else: video_codec = 'libx264' cmd.extend([ '-map', '0:v', '-map', '0:a', '-c:v', video_codec, '-preset', profile.preset, '-crf', str(profile.quality), '-c:a', profile.audio_codec, '-sn' ]) # Output options cmd.extend([ '-movflags', '+faststart', '-n', str(output_path) ]) return cmd def _reencode(self, input_path: Path, output_path: Path, profile: EncodingProfile, encoder: EncoderType) -> bool: """Re-encode video file""" self.logger.info(f"Encoding with {encoder.name}, profile: {profile.name}") cmd = self._build_ffmpeg_command(input_path, output_path, profile, encoder) try: result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: self.logger.error(f"FFmpeg error: {result.stderr}") return False return True except Exception as e: self.logger.error(f"Encoding exception: {e}") return False def _verify_output(self, output_path: Path) -> bool: """Verify encoded output""" if not output_path.exists(): self.logger.error(f"Output file missing: {output_path}") return False size = output_path.stat().st_size if size < 1024: self.logger.error(f"Output file too small: {size} bytes") return False if not MediaInspector.validate_file(output_path): self.logger.error("Output failed validation") return False # Verify subtitles were removed has_subs, count = MediaInspector.has_subtitles(output_path) if has_subs: self.logger.error(f"Output still has {count} subtitle stream(s)") return False return True # ============================================================================= # LIBRARY SCANNER # ============================================================================= class LibraryScanner: """Library scanner (same as Phase 2)""" def __init__(self, config: Config, db: StateDatabase): self.config = config self.db = db self.logger = logging.getLogger('LibraryScanner') def scan(self) -> int: """Scan movie directory""" self.logger.info(f"Scanning: {self.config.movies_dir}") # Build set of valid extensions (lowercase for case-insensitive comparison) valid_extensions = {ext.lower() for ext in self.config.file_extensions} files_found = 0 files_added = 0 # Iterate all files and filter by extension case-insensitively for filepath in self.config.movies_dir.rglob('*'): if not filepath.is_file(): continue # Check extension case-insensitively ext = filepath.suffix.lstrip('.').lower() if ext not in valid_extensions: continue files_found += 1 # Check if already completed/skipped existing = self.db.get_file(str(filepath)) if existing and existing['state'] in (ProcessingState.COMPLETED.value, ProcessingState.SKIPPED.value): continue # Inspect file has_subs, sub_count = MediaInspector.has_subtitles(filepath) original_size = filepath.stat().st_size relative_path = str(filepath.relative_to(self.config.movies_dir)) # Calculate file hash for duplicate detection file_hash = MediaInspector.get_file_hash(filepath) # Check for duplicates by content hash if file_hash: duplicates = self.db.find_duplicates_by_hash(file_hash) # Check if any duplicate has been completed completed_duplicate = next( (d for d in duplicates if d['state'] == ProcessingState.COMPLETED.value), None ) if completed_duplicate: self.logger.info(f"Skipping duplicate of already encoded file: {filepath.name}") self.logger.info(f" Original: {completed_duplicate['relative_path']}") # Mark this file as skipped self.db.add_file( str(filepath), relative_path, False, 0, original_size, None, None, None, None, None, None, None, None, file_hash ) self.db.update_state( str(filepath), ProcessingState.SKIPPED, f"Duplicate of: {completed_duplicate['relative_path']}" ) continue # Get detailed media info media_info = MediaInspector.get_media_info(filepath) video_codec = None audio_codec = None audio_channels = None width = None height = None duration = None bitrate = None container_format = None if media_info and 'streams' in media_info: # Extract video stream info for stream in media_info['streams']: if stream.get('codec_type') == 'video' and not video_codec: video_codec = stream.get('codec_name') width = stream.get('width') height = stream.get('height') elif stream.get('codec_type') == 'audio' and not audio_codec: audio_codec = stream.get('codec_name') audio_channels = stream.get('channels') # Extract format info if 'format' in media_info: duration = float(media_info['format'].get('duration', 0)) bitrate = int(media_info['format'].get('bit_rate', 0)) container_format = media_info['format'].get('format_name', '').split(',')[0] # Get first format self.db.add_file( str(filepath), relative_path, has_subs, sub_count, original_size, video_codec, audio_codec, audio_channels, width, height, duration, bitrate, container_format, file_hash ) files_added += 1 if files_found % 10 == 0: self.logger.info(f"Scanned {files_found} files...") self.logger.info(f"Scan complete: {files_found} files found, {files_added} added/updated") return files_found # ============================================================================= # PARALLEL PROCESSING COORDINATOR # ============================================================================= class ParallelCoordinator: """Coordinates parallel processing with worker pools""" def __init__(self, config: Config, db: StateDatabase, caps: EncoderCapabilities): self.config = config self.db = db self.caps = caps self.logger = logging.getLogger('ParallelCoordinator') self._stop_event = threading.Event() self._active_workers = 0 self._workers_lock = threading.Lock() def process_parallel(self, files: List[Dict], profile_name: Optional[str] = None): """Process files in parallel""" total_files = len(files) self.logger.info(f"Starting parallel processing of {total_files} files") self.logger.info(f"Max workers: {self.config.max_workers}") completed = 0 failed = 0 with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.max_workers) as executor: # Submit all tasks future_to_file = { executor.submit(self._process_file_wrapper, Path(f['filepath']), profile_name): f for f in files } # Process results as they complete for future in concurrent.futures.as_completed(future_to_file): if self._stop_event.is_set(): self.logger.info("Stop signal received, cancelling pending tasks") break file_record = future_to_file[future] try: success = future.result() if success: completed += 1 else: failed += 1 # Progress update progress = completed + failed pct = (progress / total_files) * 100 self.logger.info(f"Progress: {progress}/{total_files} ({pct:.1f}%) - " f"Success: {completed}, Failed: {failed}") except Exception as e: failed += 1 self.logger.error(f"Task exception: {e}") self.logger.info(f"Parallel processing complete: {completed} success, {failed} failed") def _process_file_wrapper(self, filepath: Path, profile_name: Optional[str]) -> bool: """Wrapper for processing file (for thread pool)""" processor = FileProcessor(self.config, self.db, self.caps) with self._workers_lock: self._active_workers += 1 try: return processor.process_file(filepath, profile_name) finally: with self._workers_lock: self._active_workers -= 1 def stop(self): """Signal to stop processing""" self._stop_event.set() # ============================================================================= # MAIN APPLICATION # ============================================================================= class ReencodeApplication: """Main application for Phase 3""" def __init__(self, config: Config): self.config = config self.db = StateDatabase(config.state_db) self.caps = EncoderDetector.detect_capabilities() if config.auto_detect_encoders else EncoderCapabilities() self.scanner = LibraryScanner(config, self.db) self.coordinator = ParallelCoordinator(config, self.db, self.caps) self.logger = logging.getLogger('ReencodeApp') # Signal handling signal.signal(signal.SIGINT, self._signal_handler) signal.signal(signal.SIGTERM, self._signal_handler) def _signal_handler(self, signum, frame): """Handle interrupt signals""" self.logger.info("Interrupt received, stopping gracefully...") self.coordinator.stop() def setup_logging(self): """Configure logging""" log_file = self.config.log_dir / 'reencode.log' from logging.handlers import RotatingFileHandler file_handler = RotatingFileHandler( log_file, maxBytes=10*1024*1024, backupCount=5 ) file_handler.setLevel(logging.DEBUG) console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) file_handler.setFormatter(formatter) console_handler.setFormatter(formatter) root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) root_logger.addHandler(file_handler) root_logger.addHandler(console_handler) def print_encoder_capabilities(self): """Print detected encoder capabilities""" # Use ASCII characters for Windows compatibility yes = '[YES]' no = '[ NO]' print("\n" + "="*60) print("ENCODER CAPABILITIES") print("="*60) print(f"CPU Encoders:") print(f" H.265 (libx265): {yes if self.caps.has_x265 else no}") print(f" H.264 (libx264): {yes if self.caps.has_x264 else no}") print(f" AV1 (SVT-AV1): {yes if self.caps.has_av1 else no}") print(f"\nGPU Encoders:") print(f" NVIDIA NVENC: {yes if self.caps.has_nvenc else no}") if self.caps.has_nvenc and self.caps.nvenc_devices: print(f" Devices: {', '.join(map(str, self.caps.nvenc_devices))}") print(f" AV1: {yes if self.caps.has_nvenc_av1 else no}") print(f" Intel QSV: {yes if self.caps.has_qsv else no}") print(f" AV1: {yes if self.caps.has_qsv_av1 else no}") print(f" AMD VAAPI: {yes if self.caps.has_vaapi else no}") print(f" AV1: {yes if self.caps.has_vaapi_av1 else no}") print("="*60 + "\n") def print_statistics(self): """Print processing statistics""" stats = self.db.get_statistics() print("\n" + "="*60) print("PROCESSING STATISTICS") print("="*60) print(f"Pending: {stats['pending']:5d}") print(f"Processing: {stats['processing']:5d}") print(f"Completed: {stats['completed']:5d}") print(f"Failed: {stats['failed']:5d}") print(f"Skipped: {stats['skipped']:5d} (no subtitles)") print("-"*60) print(f"Total: {sum(v for k, v in stats.items() if k in ['pending', 'processing', 'completed', 'failed', 'skipped']):5d}") if stats['completed'] > 0: orig_size = stats['original_size_completed'] enc_size = stats['encoded_size_completed'] print(f"\nOriginal size: {self._human_size(orig_size)}") print(f"Encoded size: {self._human_size(enc_size)}") if orig_size > 0: savings = orig_size - enc_size percent = (savings / orig_size) * 100 print(f"Space saved: {self._human_size(savings)} ({percent:.1f}%)") if stats['avg_fps'] > 0: print(f"\nAverage FPS: {stats['avg_fps']:.2f}") if stats['avg_encode_time'] > 0: print(f"Avg encode time: {stats['avg_encode_time']:.1f}s") if stats.get('encoder_usage'): print(f"\nEncoder Usage:") for encoder, count in stats['encoder_usage'].items(): print(f" {encoder}: {count}") print("="*60 + "\n") def _human_size(self, size: int) -> str: """Convert bytes to human readable format""" for unit in ['B', 'KB', 'MB', 'GB', 'TB']: if size < 1024.0: return f"{size:.2f} {unit}" size /= 1024.0 return f"{size:.2f} PB" def cleanup_stale_work_files(self): """Remove stale work files""" if not self.config.cleanup_stale_work: return self.logger.info("Cleaning up stale work files...") count = 0 for filepath in self.config.work_dir.rglob('*'): if filepath.is_file(): filepath.unlink() count += 1 if count > 0: self.logger.info(f"Removed {count} stale work file(s)") def run(self, scan_only: bool = False, no_scan: bool = False, profile_name: Optional[str] = None): """Main execution""" self.logger.info("="*60) self.logger.info("ENCODERPRO - PHASE 3") self.logger.info(f"Version: {__version__}") self.logger.info("="*60) # Show capabilities if self.config.auto_detect_encoders: self.print_encoder_capabilities() # Reset stuck files reset_count = self.db.reset_processing_files() if reset_count > 0: self.logger.info(f"Reset {reset_count} stuck file(s)") # Cleanup self.cleanup_stale_work_files() # Scan (unless --no-scan flag is set) if not no_scan: self.scanner.scan() else: self.logger.info("Skipping library scan (--no-scan mode)") # Statistics self.print_statistics() if scan_only: self.logger.info("Scan-only mode: exiting") return # Get pending files pending_files = self.db.get_files_by_state(ProcessingState.PENDING) if not pending_files: self.logger.info("No files to process!") return self.logger.info(f"Processing {len(pending_files)} file(s)...") # Process with parallel coordinator if self.config.max_workers > 1: self.coordinator.process_parallel(pending_files, profile_name) else: # Single-threaded fallback processor = FileProcessor(self.config, self.db, self.caps) for file_record in pending_files: processor.process_file(Path(file_record['filepath']), profile_name) # Final statistics self.logger.info("="*60) self.logger.info("PROCESSING COMPLETE") self.logger.info("="*60) self.print_statistics() def cleanup(self): """Cleanup resources""" self.db.close() # ============================================================================= # CONFIGURATION LOADING # ============================================================================= def load_config(config_file: Optional[Path]) -> Config: """Load configuration from file""" config_dict = {} if config_file and config_file.exists(): try: import yaml with open(config_file, 'r') as f: config_dict = yaml.safe_load(f) or {} logging.info(f"Loaded configuration from: {config_file}") except ImportError: logging.warning("PyYAML not installed, trying JSON") try: with open(config_file, 'r') as f: config_dict = json.load(f) logging.info(f"Loaded configuration from: {config_file}") except Exception as e: logging.error(f"Failed to load config: {e}") return Config(config_dict) # ============================================================================= # ENTRY POINT # ============================================================================= def main(): """Main entry point""" parser = argparse.ArgumentParser( description='encoderPro - Phase 3', formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('-c', '--config', type=Path, help='Configuration file (YAML or JSON)') parser.add_argument('--scan-only', action='store_true', help='Only scan, do not process') parser.add_argument('--no-scan', action='store_true', help='Skip library scan (for dashboard use)') parser.add_argument('--stats', action='store_true', help='Show statistics and exit') parser.add_argument('-p', '--profile', help='Encoding profile to use') parser.add_argument('-v', '--version', action='version', version=f'%(prog)s {__version__}') args = parser.parse_args() # Load configuration config = load_config(args.config) # Create application app = ReencodeApplication(config) app.setup_logging() try: if args.stats: app.print_statistics() else: app.run(scan_only=args.scan_only, no_scan=args.no_scan, profile_name=args.profile) except KeyboardInterrupt: app.logger.info("Interrupted by user") except Exception as e: app.logger.error(f"Fatal error: {e}", exc_info=True) return 1 finally: app.cleanup() return 0 if __name__ == '__main__': sys.exit(main())