feat: Introduce logging and type hints, replacing print statements with structured logging.

This commit is contained in:
2025-11-21 12:28:27 +01:00
parent 961297293a
commit 598d3001a6
2 changed files with 1106 additions and 306 deletions

View File

@@ -5,9 +5,11 @@ import subprocess
import shutil import shutil
import tempfile import tempfile
import json import json
import re # Added for VFR frame rate parsing import re
import logging
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import List, Optional, Dict, Any, Union
REQUIRED_TOOLS = [ REQUIRED_TOOLS = [
"ffmpeg", "ffprobe", "mkvmerge", "mkvpropedit", "ffmpeg", "ffprobe", "mkvmerge", "mkvpropedit",
@@ -34,26 +36,56 @@ SVT_AV1_PARAMS = {
"progress": 2, # Detailed progress output "progress": 2, # Detailed progress output
} }
def check_tools(): def check_tools() -> None:
for tool in REQUIRED_TOOLS: for tool in REQUIRED_TOOLS:
if shutil.which(tool) is None: if shutil.which(tool) is None:
print(f"Required tool '{tool}' not found in PATH.") print(f"Required tool '{tool}' not found in PATH.")
sys.exit(1) sys.exit(1)
def run_cmd(cmd, capture_output=False, check=True): def setup_logging(log_file: Path) -> None:
"""Configures logging to write to both the console and a log file."""
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Clear existing handlers to avoid duplicates when processing multiple files
if logger.hasHandlers():
logger.handlers.clear()
# File Handler
fh = logging.FileHandler(log_file, mode='w', encoding='utf-8')
fh.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(fh)
# Console Handler
ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(ch)
def run_cmd(cmd: List[str], capture_output: bool = False, check: bool = True) -> Optional[str]:
"""
Runs a command.
If capture_output is True, returns the stdout string.
If capture_output is False, streams output to the console (passthrough).
Always logs the command execution.
"""
cmd_str = " ".join(str(c) for c in cmd)
logging.info(f"Executing: {cmd_str}")
if capture_output: if capture_output:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=check, text=True) result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=check, text=True)
return result.stdout return result.stdout
else: else:
# Passthrough to console
subprocess.run(cmd, check=check) subprocess.run(cmd, check=check)
return None
def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_downmix): def convert_audio_track(index: int, ch: int, lang: str, audio_temp_dir: str, source_file: str, should_downmix: bool) -> Path:
audio_temp_path = Path(audio_temp_dir) audio_temp_path = Path(audio_temp_dir)
temp_extracted = audio_temp_path / f"track_{index}_extracted.flac" temp_extracted = audio_temp_path / f"track_{index}_extracted.flac"
temp_normalized = audio_temp_path / f"track_{index}_normalized.flac" temp_normalized = audio_temp_path / f"track_{index}_normalized.flac"
final_opus = audio_temp_path / f"track_{index}_final.opus" final_opus = audio_temp_path / f"track_{index}_final.opus"
print(f" - Extracting Audio Track #{index} to FLAC...") logging.info(f" - Extracting Audio Track #{index} to FLAC...")
ffmpeg_args = [ ffmpeg_args = [
"ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(source_file), "-map", f"0:{index}", "-map_metadata", "-1" "ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(source_file), "-map", f"0:{index}", "-map_metadata", "-1"
] ]
@@ -67,17 +99,19 @@ def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_dow
ffmpeg_args += ["-c:a", "flac", str(temp_extracted)] ffmpeg_args += ["-c:a", "flac", str(temp_extracted)]
run_cmd(ffmpeg_args) run_cmd(ffmpeg_args)
print(f" - Normalizing Audio Track #{index} with ffmpeg (loudnorm 2-pass)...") logging.info(f" - Normalizing Audio Track #{index} with ffmpeg (loudnorm 2-pass)...")
# First pass: Analyze the audio to get loudnorm stats # First pass: Analyze the audio to get loudnorm stats
# The stats are printed to stderr, so we must use subprocess.run directly to capture it. # The stats are printed to stderr, so we must use subprocess.run directly to capture it.
print(" - Pass 1: Analyzing...") logging.info(" - Pass 1: Analyzing...")
# Log the command for consistency
analyze_cmd = ["ffmpeg", "-v", "info", "-i", str(temp_extracted), "-af", "loudnorm=I=-23:LRA=7:tp=-1:print_format=json", "-f", "null", "-"]
logging.info(f"Executing: {' '.join(analyze_cmd)}")
result = subprocess.run( result = subprocess.run(
["ffmpeg", "-v", "info", "-i", str(temp_extracted), "-af", "loudnorm=I=-23:LRA=7:tp=-1:print_format=json", "-f", "null", "-"], analyze_cmd,
capture_output=True, text=True, check=True) capture_output=True, text=True, check=True)
# Find the start of the JSON block in stderr and parse it. # Find the start of the JSON block in stderr and parse it.
# This is more robust than slicing the last N lines.
# We find the start and end of the JSON block to avoid parsing extra data.
stderr_output = result.stderr stderr_output = result.stderr
json_start_index = stderr_output.find('{') json_start_index = stderr_output.find('{')
if json_start_index == -1: if json_start_index == -1:
@@ -97,7 +131,7 @@ def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_dow
stats = json.loads(stderr_output[json_start_index:json_end_index]) stats = json.loads(stderr_output[json_start_index:json_end_index])
# Second pass: Apply the normalization using the stats from the first pass # Second pass: Apply the normalization using the stats from the first pass
print(" - Pass 2: Applying normalization...") logging.info(" - Pass 2: Applying normalization...")
run_cmd([ run_cmd([
"ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(temp_extracted), "-af", "ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(temp_extracted), "-af",
f"loudnorm=I=-23:LRA=7:tp=-1:measured_i={stats['input_i']}:measured_lra={stats['input_lra']}:measured_tp={stats['input_tp']}:measured_thresh={stats['input_thresh']}:offset={stats['target_offset']}", f"loudnorm=I=-23:LRA=7:tp=-1:measured_i={stats['input_i']}:measured_lra={stats['input_lra']}:measured_tp={stats['input_tp']}:measured_thresh={stats['input_thresh']}:offset={stats['target_offset']}",
@@ -105,16 +139,11 @@ def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_dow
]) ])
# Set bitrate based on the final channel count of the Opus file. # Set bitrate based on the final channel count of the Opus file.
# If we are downmixing, the result is stereo.
# If not, the result has the original channel count.
is_being_downmixed = should_downmix and ch >= 6 is_being_downmixed = should_downmix and ch >= 6
if is_being_downmixed: if is_being_downmixed:
# Downmixing from 5.1 or 7.1 results in a stereo track.
bitrate = "128k" bitrate = "128k"
else: else:
# Not downmixing (or source is already stereo or less).
# Base bitrate on the source channel count.
if ch == 1: # Mono if ch == 1: # Mono
bitrate = "64k" bitrate = "64k"
elif ch == 2: # Stereo elif ch == 2: # Stereo
@@ -124,16 +153,16 @@ def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_dow
elif ch == 8: # 7.1 Surround elif ch == 8: # 7.1 Surround
bitrate = "384k" bitrate = "384k"
else: # Other layouts else: # Other layouts
bitrate = "96k" # A sensible default for other/uncommon layouts. bitrate = "96k"
print(f" - Encoding Audio Track #{index} to Opus at {bitrate}...") logging.info(f" - Encoding Audio Track #{index} to Opus at {bitrate}...")
run_cmd([ run_cmd([
"opusenc", "--vbr", "--bitrate", bitrate, str(temp_normalized), str(final_opus) "opusenc", "--vbr", "--bitrate", bitrate, str(temp_normalized), str(final_opus)
]) ])
return final_opus return final_opus
def convert_video(source_file_base, source_file_full, is_vfr, target_cfr_fps_for_handbrake, autocrop_filter=None): def convert_video(source_file_base: str, source_file_full: str, is_vfr: bool, target_cfr_fps_for_handbrake: Optional[str], autocrop_filter: Optional[str] = None) -> tuple[Path, Optional[Path]]:
print(" --- Starting Video Processing ---") logging.info(" --- Starting Video Processing ---")
# source_file_base is file_path.stem (e.g., "my.anime.episode.01") # source_file_base is file_path.stem (e.g., "my.anime.episode.01")
vpy_file = Path(f"{source_file_base}.vpy") vpy_file = Path(f"{source_file_base}.vpy")
ut_video_file = Path(f"{source_file_base}.ut.mkv") ut_video_file = Path(f"{source_file_base}.ut.mkv")
@@ -143,7 +172,7 @@ def convert_video(source_file_base, source_file_full, is_vfr, target_cfr_fps_for
current_input_for_utvideo = Path(source_file_full) current_input_for_utvideo = Path(source_file_full)
if is_vfr and target_cfr_fps_for_handbrake: if is_vfr and target_cfr_fps_for_handbrake:
print(f" - Source is VFR. Converting to CFR ({target_cfr_fps_for_handbrake}) with HandBrakeCLI...") logging.info(f" - Source is VFR. Converting to CFR ({target_cfr_fps_for_handbrake}) with HandBrakeCLI...")
handbrake_cfr_intermediate_file = Path(f"{source_file_base}.cfr_temp.mkv") handbrake_cfr_intermediate_file = Path(f"{source_file_base}.cfr_temp.mkv")
handbrake_args = [ handbrake_args = [
"HandBrakeCLI", "HandBrakeCLI",
@@ -159,33 +188,37 @@ def convert_video(source_file_base, source_file_full, is_vfr, target_cfr_fps_for
"--subtitle", "none", "--subtitle", "none",
"--crop-mode", "none" # Disable auto-cropping "--crop-mode", "none" # Disable auto-cropping
] ]
print(f" - Running HandBrakeCLI: {' '.join(handbrake_args)}") logging.info(f" - Running HandBrakeCLI: {' '.join(handbrake_args)}")
try: try:
run_cmd(handbrake_args) run_cmd(handbrake_args)
if handbrake_cfr_intermediate_file.exists() and handbrake_cfr_intermediate_file.stat().st_size > 0: if handbrake_cfr_intermediate_file.exists() and handbrake_cfr_intermediate_file.stat().st_size > 0:
print(f" - HandBrake VFR to CFR conversion successful: {handbrake_cfr_intermediate_file}") logging.info(f" - HandBrake VFR to CFR conversion successful: {handbrake_cfr_intermediate_file}")
current_input_for_utvideo = handbrake_cfr_intermediate_file current_input_for_utvideo = handbrake_cfr_intermediate_file
else: else:
print(f" - Warning: HandBrakeCLI VFR-to-CFR conversion failed or produced an empty file. Proceeding with original source for UTVideo.") logging.info(f" - Warning: HandBrakeCLI VFR-to-CFR conversion failed or produced an empty file. Proceeding with original source for UTVideo.")
handbrake_cfr_intermediate_file = None # Ensure it's None if failed handbrake_cfr_intermediate_file = None # Ensure it's None if failed
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f" - Error during HandBrakeCLI execution: {e}") logging.info(f" - Error during HandBrakeCLI execution: {e}")
print(f" - Proceeding with original source for UTVideo.") logging.info(f" - Proceeding with original source for UTVideo.")
handbrake_cfr_intermediate_file = None # Ensure it's None if failed handbrake_cfr_intermediate_file = None # Ensure it's None if failed
print(" - Creating UTVideo intermediate file (overwriting if exists)...") logging.info(" - Creating UTVideo intermediate file (overwriting if exists)...")
# Check if source is already UTVideo # Check if source is already UTVideo
ffprobe_cmd = [ ffprobe_cmd = [
"ffprobe", "-v", "error", "-select_streams", "v:0", "ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1",
str(current_input_for_utvideo) # Use current input, which might be HandBrake output str(current_input_for_utvideo) # Use current input, which might be HandBrake output
] ]
source_codec = run_cmd(ffprobe_cmd, capture_output=True, check=True).strip() source_codec = run_cmd(ffprobe_cmd, capture_output=True, check=True)
if source_codec:
source_codec = source_codec.strip()
else:
source_codec = ""
video_codec_args = ["-c:v", "utvideo"] video_codec_args = ["-c:v", "utvideo"]
if source_codec == "utvideo" and current_input_for_utvideo == Path(source_file_full): # Only copy if original was UTVideo if source_codec == "utvideo" and current_input_for_utvideo == Path(source_file_full): # Only copy if original was UTVideo
print(" - Source is already UTVideo. Copying video stream...") logging.info(" - Source is already UTVideo. Copying video stream...")
video_codec_args = ["-c:v", "copy"] video_codec_args = ["-c:v", "copy"]
ffmpeg_args = [ ffmpeg_args = [
@@ -197,7 +230,7 @@ def convert_video(source_file_base, source_file_full, is_vfr, target_cfr_fps_for
ffmpeg_args += video_codec_args + [str(ut_video_file)] ffmpeg_args += video_codec_args + [str(ut_video_file)]
run_cmd(ffmpeg_args) run_cmd(ffmpeg_args)
print(" - Indexing UTVideo file with ffmsindex for VapourSynth...") logging.info(" - Indexing UTVideo file with ffmsindex for VapourSynth...")
ffmsindex_args = ["ffmsindex", "-f", str(ut_video_file)] ffmsindex_args = ["ffmsindex", "-f", str(ut_video_file)]
run_cmd(ffmsindex_args) run_cmd(ffmsindex_args)
@@ -212,14 +245,14 @@ clip.set_output()
with vpy_file.open("w", encoding="utf-8") as f: with vpy_file.open("w", encoding="utf-8") as f:
f.write(vpy_script_content) f.write(vpy_script_content)
print(" - Starting AV1 encode with av1an (this will take a long time)...") logging.info(" - Starting AV1 encode with av1an (this will take a long time)...")
total_cores = os.cpu_count() or 4 # Fallback if cpu_count is None total_cores = os.cpu_count() or 4 # Fallback if cpu_count is None
workers = max(1, (total_cores // 2) - 1) # Half the cores minus one, with a minimum of 1 worker. workers = max(1, (total_cores // 2) - 1) # Half the cores minus one, with a minimum of 1 worker.
print(f" - Using {workers} workers for av1an (Total Cores: {total_cores}, Logic: (Cores/2)-1).") logging.info(f" - Using {workers} workers for av1an (Total Cores: {total_cores}, Logic: (Cores/2)-1).")
# Create the parameter string for av1an's -v option, which expects a single string. # Create the parameter string for av1an's -v option, which expects a single string.
av1an_video_params_str = " ".join([f"--{key} {value}" for key, value in SVT_AV1_PARAMS.items()]) av1an_video_params_str = " ".join([f"--{key} {value}" for key, value in SVT_AV1_PARAMS.items()])
print(f" - Using SVT-AV1 parameters: {av1an_video_params_str}") logging.info(f" - Using SVT-AV1 parameters: {av1an_video_params_str}")
av1an_enc_args = [ av1an_enc_args = [
"av1an", "-i", str(vpy_file), "-o", str(encoded_video_file), "-n", "av1an", "-i", str(vpy_file), "-o", str(encoded_video_file), "-n",
@@ -229,7 +262,7 @@ clip.set_output()
"-v", av1an_video_params_str "-v", av1an_video_params_str
] ]
run_cmd(av1an_enc_args) run_cmd(av1an_enc_args)
print(" --- Finished Video Processing ---") logging.info(" --- Finished Video Processing ---")
return encoded_video_file, handbrake_cfr_intermediate_file return encoded_video_file, handbrake_cfr_intermediate_file
def is_ffmpeg_decodable(file_path): def is_ffmpeg_decodable(file_path):
@@ -470,79 +503,62 @@ def detect_autocrop_filter(input_file, significant_crop_threshold=5.0, min_crop=
return None return None
return _analyze_video_cropdetect(input_file, duration, width, height, max(1, os.cpu_count() // 2), significant_crop_threshold, min_crop, debug) return _analyze_video_cropdetect(input_file, duration, width, height, max(1, os.cpu_count() // 2), significant_crop_threshold, min_crop, debug)
def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None): def parse_arguments() -> argparse.Namespace:
check_tools() parser = argparse.ArgumentParser(description="Batch-process MKV files with resumable video encoding, audio downmixing, per-file logging, and optional autocrop.")
parser.add_argument("--no-downmix", action="store_true", help="Preserve original audio channel layout.")
# Override default SVT-AV1 params if provided via command line parser.add_argument("--autocrop", action="store_true", help="Automatically detect and crop black bars from video using cropdetect.")
if speed: parser.add_argument("--speed", type=str, help="Set the encoding speed. Possible values: slower, slow, medium, fast, faster.")
SVT_AV1_PARAMS["speed"] = speed parser.add_argument("--quality", type=str, help="Set the encoding quality. Possible values: lowest, low, medium, high, higher.")
if quality: parser.add_argument("--grain", type=int, help="Set the film-grain value (number). Adjusts the film grain synthesis level.")
SVT_AV1_PARAMS["quality"] = quality return parser.parse_args()
if grain is not None:
SVT_AV1_PARAMS["film-grain"] = grain
def process_file(file_path: Path, no_downmix: bool, autocrop: bool) -> None:
current_dir = Path(".") current_dir = Path(".")
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
)
if not files_to_process:
print("No MKV files found to process. Exiting.")
return
DIR_COMPLETED.mkdir(exist_ok=True, parents=True)
DIR_ORIGINAL.mkdir(exist_ok=True, parents=True)
DIR_CONV_LOGS.mkdir(exist_ok=True, parents=True)
while True:
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
)
if not files_to_process:
print("No more .mkv files found to process in the current directory. The script will now exit.")
break
file_path = files_to_process[0]
if not is_ffmpeg_decodable(file_path):
print(f"ERROR: ffmpeg cannot decode '{file_path.name}'. Skipping this file.", file=sys.stderr)
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
continue
print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
log_file_name = f"{file_path.stem}.log" log_file_name = f"{file_path.stem}.log"
log_file_path = DIR_CONV_LOGS / log_file_name log_file_path = DIR_CONV_LOGS / log_file_name
original_stdout_console = sys.stdout
original_stderr_console = sys.stderr # Setup logging for this file
print(f"Processing: {file_path.name}", file=original_stdout_console) setup_logging(log_file_path)
print(f"Logging output to: {log_file_path}", file=original_stdout_console)
log_file_handle = None logging.info(f"STARTING LOG FOR: {file_path.name}")
processing_error_occurred = False logging.info(f"Processing started at: {datetime.now()}")
date_for_runtime_calc = datetime.now() logging.info(f"Full input file path: {file_path.resolve()}")
try: logging.info("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
log_file_handle = open(log_file_path, 'w', encoding='utf-8')
sys.stdout = log_file_handle
sys.stderr = log_file_handle
print(f"STARTING LOG FOR: {file_path.name}")
print(f"Processing started at: {date_for_runtime_calc}")
print(f"Full input file path: {file_path.resolve()}")
print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
input_file_abs = file_path.resolve() input_file_abs = file_path.resolve()
intermediate_output_file = current_dir / f"output-{file_path.name}" intermediate_output_file = current_dir / f"output-{file_path.name}"
audio_temp_dir = None audio_temp_dir = None
handbrake_intermediate_for_cleanup = None handbrake_intermediate_for_cleanup = None
processing_error_occurred = False
start_time = datetime.now()
try: try:
audio_temp_dir = tempfile.mkdtemp(prefix="anime_audio_") audio_temp_dir = tempfile.mkdtemp(prefix="anime_audio_")
print(f"Audio temporary directory created at: {audio_temp_dir}") logging.info(f"Audio temporary directory created at: {audio_temp_dir}")
print(f"Analyzing file: {input_file_abs}") logging.info(f"Analyzing file: {input_file_abs}")
ffprobe_info_json = run_cmd([ ffprobe_info_json = run_cmd([
"ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(input_file_abs) "ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(input_file_abs)
], capture_output=True) ], capture_output=True)
if not ffprobe_info_json:
raise RuntimeError("ffprobe failed to return info")
ffprobe_info = json.loads(ffprobe_info_json) ffprobe_info = json.loads(ffprobe_info_json)
mkvmerge_info_json = run_cmd([ mkvmerge_info_json = run_cmd([
"mkvmerge", "-J", str(input_file_abs) "mkvmerge", "-J", str(input_file_abs)
], capture_output=True) ], capture_output=True)
if not mkvmerge_info_json:
raise RuntimeError("mkvmerge failed to return info")
mkv_info = json.loads(mkvmerge_info_json) mkv_info = json.loads(mkvmerge_info_json)
mediainfo_json = run_cmd([ mediainfo_json = run_cmd([
"mediainfo", "--Output=JSON", "-f", str(input_file_abs) "mediainfo", "--Output=JSON", "-f", str(input_file_abs)
], capture_output=True) ], capture_output=True)
if not mediainfo_json:
raise RuntimeError("mediainfo failed to return info")
media_info = json.loads(mediainfo_json) media_info = json.loads(mediainfo_json)
is_vfr = False is_vfr = False
target_cfr_fps_for_handbrake = None target_cfr_fps_for_handbrake = None
video_track_info = None video_track_info = None
@@ -551,11 +567,12 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
if track.get("@type") == "Video": if track.get("@type") == "Video":
video_track_info = track video_track_info = track
break break
if video_track_info: if video_track_info:
frame_rate_mode = video_track_info.get("FrameRate_Mode") frame_rate_mode = video_track_info.get("FrameRate_Mode")
if frame_rate_mode and frame_rate_mode.upper() in ["VFR", "VARIABLE"]: if frame_rate_mode and frame_rate_mode.upper() in ["VFR", "VARIABLE"]:
is_vfr = True is_vfr = True
print(f" - Detected VFR based on MediaInfo FrameRate_Mode: {frame_rate_mode}") logging.info(f" - Detected VFR based on MediaInfo FrameRate_Mode: {frame_rate_mode}")
original_fps_str = video_track_info.get("FrameRate_Original_String") original_fps_str = video_track_info.get("FrameRate_Original_String")
if original_fps_str: if original_fps_str:
match = re.search(r'\((\d+/\d+)\)', original_fps_str) match = re.search(r'\((\d+/\d+)\)', original_fps_str)
@@ -568,35 +585,38 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
if not target_cfr_fps_for_handbrake: if not target_cfr_fps_for_handbrake:
target_cfr_fps_for_handbrake = video_track_info.get("FrameRate") target_cfr_fps_for_handbrake = video_track_info.get("FrameRate")
if target_cfr_fps_for_handbrake: if target_cfr_fps_for_handbrake:
print(f" - Using MediaInfo FrameRate ({target_cfr_fps_for_handbrake}) as fallback for HandBrake target FPS.") logging.info(f" - Using MediaInfo FrameRate ({target_cfr_fps_for_handbrake}) as fallback for HandBrake target FPS.")
if target_cfr_fps_for_handbrake: if target_cfr_fps_for_handbrake:
print(f" - Target CFR for HandBrake: {target_cfr_fps_for_handbrake}") logging.info(f" - Target CFR for HandBrake: {target_cfr_fps_for_handbrake}")
if isinstance(target_cfr_fps_for_handbrake, str) and "/" in target_cfr_fps_for_handbrake: if isinstance(target_cfr_fps_for_handbrake, str) and "/" in target_cfr_fps_for_handbrake:
try: try:
num, den = map(float, target_cfr_fps_for_handbrake.split('/')) num, den = map(float, target_cfr_fps_for_handbrake.split('/'))
target_cfr_fps_for_handbrake = f"{num / den:.3f}" target_cfr_fps_for_handbrake = f"{num / den:.3f}"
print(f" - Converted fractional FPS to decimal for HandBrake: {target_cfr_fps_for_handbrake}") logging.info(f" - Converted fractional FPS to decimal for HandBrake: {target_cfr_fps_for_handbrake}")
except ValueError: except ValueError:
print(f" - Warning: Could not parse fractional FPS '{target_cfr_fps_for_handbrake}'. HandBrakeCLI might fail.") logging.info(f" - Warning: Could not parse fractional FPS '{target_cfr_fps_for_handbrake}'. HandBrakeCLI might fail.")
is_vfr = False is_vfr = False
else: else:
print(" - Warning: VFR detected, but could not determine target CFR from MediaInfo. Will attempt standard UTVideo conversion without HandBrake.") logging.info(" - Warning: VFR detected, but could not determine target CFR from MediaInfo. Will attempt standard UTVideo conversion without HandBrake.")
is_vfr = False is_vfr = False
else: else:
print(f" - Video appears to be CFR or FrameRate_Mode not specified as VFR/Variable by MediaInfo.") logging.info(f" - Video appears to be CFR or FrameRate_Mode not specified as VFR/Variable by MediaInfo.")
autocrop_filter = None autocrop_filter = None
if autocrop: if autocrop:
print("--- Running autocrop detection ---") logging.info("--- Running autocrop detection ---")
autocrop_filter = detect_autocrop_filter(str(input_file_abs)) autocrop_filter = detect_autocrop_filter(str(input_file_abs))
if autocrop_filter: if autocrop_filter:
print(f" - Autocrop filter detected: {autocrop_filter}") logging.info(f" - Autocrop filter detected: {autocrop_filter}")
else: else:
print(" - No crop needed or detected.") logging.info(" - No crop needed or detected.")
encoded_video_file, handbrake_intermediate_for_cleanup = convert_video( encoded_video_file, handbrake_intermediate_for_cleanup = convert_video(
file_path.stem, str(input_file_abs), is_vfr, target_cfr_fps_for_handbrake, autocrop_filter=autocrop_filter file_path.stem, str(input_file_abs), is_vfr, target_cfr_fps_for_handbrake, autocrop_filter=autocrop_filter
) )
print("--- Starting Audio Processing ---") logging.info("--- Starting Audio Processing ---")
processed_audio_files = [] processed_audio_files = []
audio_tracks_to_remux = [] audio_tracks_to_remux = []
audio_streams = [s for s in ffprobe_info.get("streams", []) if s.get("codec_type") == "audio"] audio_streams = [s for s in ffprobe_info.get("streams", []) if s.get("codec_type") == "audio"]
@@ -642,7 +662,7 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
except Exception: except Exception:
track_delay = 0 track_delay = 0
print(f"Processing Audio Stream #{stream_index} (TID: {track_id}, Codec: {codec}, Channels: {channels})") logging.info(f"Processing Audio Stream #{stream_index} (TID: {track_id}, Codec: {codec}, Channels: {channels})")
if codec in REMUX_CODECS: if codec in REMUX_CODECS:
audio_tracks_to_remux.append(str(track_id)) audio_tracks_to_remux.append(str(track_id))
else: else:
@@ -657,10 +677,10 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
"Delay": track_delay "Delay": track_delay
}) })
print("--- Finished Audio Processing ---") logging.info("--- Finished Audio Processing ---")
# Final mux # Final mux
print("Assembling final file with mkvmerge...") logging.info("Assembling final file with mkvmerge...")
mkvmerge_args = ["mkvmerge", "-o", str(intermediate_output_file), str(encoded_video_file)] mkvmerge_args = ["mkvmerge", "-o", str(intermediate_output_file), str(encoded_video_file)]
for file_info in processed_audio_files: for file_info in processed_audio_files:
sync_switch = ["--sync", f"0:{file_info['Delay']}"] if file_info["Delay"] else [] sync_switch = ["--sync", f"0:{file_info['Delay']}"] if file_info["Delay"] else []
@@ -678,11 +698,11 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
run_cmd(mkvmerge_args) run_cmd(mkvmerge_args)
# Move files # Move files
print("Moving files to final destinations...") logging.info("Moving files to final destinations...")
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name) shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
shutil.move(str(intermediate_output_file), DIR_COMPLETED / file_path.name) shutil.move(str(intermediate_output_file), DIR_COMPLETED / file_path.name)
print("Cleaning up persistent video temporary files (after successful processing)...") logging.info("Cleaning up persistent video temporary files (after successful processing)...")
video_temp_files_on_success = [ video_temp_files_on_success = [
current_dir / f"{file_path.stem}.vpy", current_dir / f"{file_path.stem}.vpy",
current_dir / f"{file_path.stem}.ut.mkv", current_dir / f"{file_path.stem}.ut.mkv",
@@ -695,79 +715,86 @@ def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None)
for temp_vid_file in video_temp_files_on_success: for temp_vid_file in video_temp_files_on_success:
if temp_vid_file.exists(): if temp_vid_file.exists():
print(f" Deleting: {temp_vid_file}") logging.info(f" Deleting: {temp_vid_file}")
temp_vid_file.unlink(missing_ok=True) temp_vid_file.unlink(missing_ok=True)
else: else:
print(f" Skipping (not found): {temp_vid_file}") logging.info(f" Skipping (not found): {temp_vid_file}")
except Exception as e: except Exception as e:
print(f"ERROR: An error occurred while processing '{file_path.name}': {e}", file=sys.stderr) # Goes to log logging.error(f"ERROR: An error occurred while processing '{file_path.name}': {e}")
original_stderr_console.write(f"ERROR during processing of '{file_path.name}': {e}\nSee log '{log_file_path}' for details.\n")
processing_error_occurred = True processing_error_occurred = True
finally: finally:
# This is the original 'finally' block. Its prints go to the log file. logging.info("--- Starting Universal Cleanup (for this file) ---")
print("--- Starting Universal Cleanup (for this file) ---") logging.info(" - Cleaning up disposable audio temporary directory...")
print(" - Cleaning up disposable audio temporary directory...")
if audio_temp_dir and Path(audio_temp_dir).exists(): if audio_temp_dir and Path(audio_temp_dir).exists():
shutil.rmtree(audio_temp_dir, ignore_errors=True) shutil.rmtree(audio_temp_dir, ignore_errors=True)
print(f" - Deleted audio temp dir: {audio_temp_dir}") logging.info(f" - Deleted audio temp dir: {audio_temp_dir}")
elif audio_temp_dir: # Was created but now not found elif audio_temp_dir:
print(f" - Audio temp dir not found or already cleaned: {audio_temp_dir}") logging.info(f" - Audio temp dir not found or already cleaned: {audio_temp_dir}")
else: # Was never created else:
print(f" - Audio temp dir was not created.") logging.info(f" - Audio temp dir was not created.")
print(" - Cleaning up intermediate output file (if it wasn't moved on success)...") logging.info(" - Cleaning up intermediate output file (if it wasn't moved on success)...")
if intermediate_output_file.exists(): # Check if it still exists (e.g. error before move) if intermediate_output_file.exists():
if processing_error_occurred: if processing_error_occurred:
print(f" - WARNING: Processing error occurred. Intermediate output file '{intermediate_output_file}' is being preserved at its original path for inspection.") logging.warning(f" - WARNING: Processing error occurred. Intermediate output file '{intermediate_output_file}' is being preserved at its original path for inspection.")
else: else:
# No processing error, so it should have been moved. logging.info(f" - INFO: Intermediate output file '{intermediate_output_file}' found at original path despite no errors (expected to be moved). Cleaning up.")
# If it's still here, it's unexpected but we'll clean it up. intermediate_output_file.unlink(missing_ok=True)
print(f" - INFO: Intermediate output file '{intermediate_output_file}' found at original path despite no errors (expected to be moved). Cleaning up.") logging.info(f" - Deleted intermediate output file from original path: {intermediate_output_file}")
intermediate_output_file.unlink(missing_ok=True) # Only unlink if no error and it exists
print(f" - Deleted intermediate output file from original path: {intermediate_output_file}")
else: else:
# File does not exist at original path
if not processing_error_occurred: if not processing_error_occurred:
print(f" - Intermediate output file successfully moved (not found at original path, as expected): {intermediate_output_file}") logging.info(f" - Intermediate output file successfully moved (not found at original path, as expected): {intermediate_output_file}")
else: else:
print(f" - Processing error occurred, and intermediate output file '{intermediate_output_file}' not found at original path (likely not created or cleaned by another step).") logging.info(f" - Processing error occurred, and intermediate output file '{intermediate_output_file}' not found at original path (likely not created or cleaned by another step).")
# --- End of original per-file processing block ---
print(f"FINISHED LOG FOR: {file_path.name}") runtime = datetime.now() - start_time
# --- End of log-specific messages ---
finally: # Outer finally for restoring stdout/stderr and closing log file
runtime = datetime.now() - date_for_runtime_calc
runtime_str = str(runtime).split('.')[0] runtime_str = str(runtime).split('.')[0]
logging.info(f"\nTotal runtime for this file: {runtime_str}")
logging.info(f"FINISHED LOG FOR: {file_path.name}")
# This print goes to the log file, as stdout is not yet restored. def main() -> None:
print(f"\nTotal runtime for this file: {runtime_str}") args = parse_arguments()
check_tools()
if sys.stdout != original_stdout_console: # Override default SVT-AV1 params if provided via command line
sys.stdout = original_stdout_console if args.speed:
if sys.stderr != original_stderr_console: SVT_AV1_PARAMS["speed"] = args.speed
sys.stderr = original_stderr_console if args.quality:
if log_file_handle: SVT_AV1_PARAMS["quality"] = args.quality
log_file_handle.close() if args.grain is not None:
SVT_AV1_PARAMS["film-grain"] = args.grain
# Announce to console (original stdout/stderr) that this file is done current_dir = Path(".")
if processing_error_occurred: DIR_COMPLETED.mkdir(exist_ok=True, parents=True)
original_stderr_console.write(f"File: {file_path.name}\n") DIR_ORIGINAL.mkdir(exist_ok=True, parents=True)
original_stderr_console.write(f"Log: {log_file_path}\n") DIR_CONV_LOGS.mkdir(exist_ok=True, parents=True)
original_stderr_console.write(f"Runtime: {runtime_str}\n")
else: files_to_process = sorted(
original_stdout_console.write(f"File: {file_path.name}\n") f for f in current_dir.glob("*.mkv")
original_stdout_console.write(f"Log: {log_file_path}\n") if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
original_stdout_console.write(f"Runtime: {runtime_str}\n") )
if not files_to_process:
print("No MKV files found to process. Exiting.")
return
while True:
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
)
if not files_to_process:
print("No more .mkv files found to process in the current directory. The script will now exit.")
break
file_path = files_to_process[0]
if not is_ffmpeg_decodable(file_path):
print(f"ERROR: ffmpeg cannot decode '{file_path.name}'. Skipping this file.", file=sys.stderr)
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
continue
process_file(file_path, args.no_downmix, args.autocrop)
if __name__ == "__main__": if __name__ == "__main__":
import argparse import argparse
parser = argparse.ArgumentParser(description="Batch-process MKV files with resumable video encoding, audio downmixing, per-file logging, and optional autocrop.") main()
parser.add_argument("--no-downmix", action="store_true", help="Preserve original audio channel layout.")
parser.add_argument("--autocrop", action="store_true", help="Automatically detect and crop black bars from video using cropdetect.")
parser.add_argument("--speed", type=str, help="Set the encoding speed. Possible values: slower, slow, medium, fast, faster.")
parser.add_argument("--quality", type=str, help="Set the encoding quality. Possible values: lowest, low, medium, high, higher.")
parser.add_argument("--grain", type=int, help="Set the film-grain value (number). Adjusts the film grain synthesis level.")
args = parser.parse_args()
main(no_downmix=args.no_downmix, autocrop=args.autocrop, speed=args.speed, quality=args.quality, grain=args.grain)

773
av1_opus_encoder.py.bak Normal file
View File

@@ -0,0 +1,773 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
import shutil
import tempfile
import json
import re # Added for VFR frame rate parsing
from datetime import datetime
from pathlib import Path
REQUIRED_TOOLS = [
"ffmpeg", "ffprobe", "mkvmerge", "mkvpropedit",
"opusenc", "mediainfo", "av1an", "HandBrakeCLI", "ffmsindex" # Added HandBrakeCLI and ffmsindex
]
DIR_COMPLETED = Path("completed")
DIR_ORIGINAL = Path("original")
DIR_CONV_LOGS = Path("conv_logs") # Directory for conversion logs
REMUX_CODECS = {"aac", "opus"} # Using a set for efficient lookups
SVT_AV1_PARAMS = {
"speed": "slower", # "slower", "slow", "medium", "fast", "faster"
"quality": "medium", # "higher", "high", "medium", "low", "lower"
"film-grain": 6,
"color-primaries": 1,
"transfer-characteristics": 1,
"matrix-coefficients": 1,
"scd": 0, # Scene change detection OFF for Av1an use
"keyint": 0, # Keyframe interval, 0 disables automatic keyframes placement at a constant interval
"lp": 2, # Level of parallelism
"auto-tiling": 1, # Auto tiling ON
"tune": 1, # 0 = VQ, 1 = PSNR, 2 = SSIM
"progress": 2, # Detailed progress output
}
def check_tools():
for tool in REQUIRED_TOOLS:
if shutil.which(tool) is None:
print(f"Required tool '{tool}' not found in PATH.")
sys.exit(1)
def run_cmd(cmd, capture_output=False, check=True):
if capture_output:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=check, text=True)
return result.stdout
else:
subprocess.run(cmd, check=check)
def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_downmix):
audio_temp_path = Path(audio_temp_dir)
temp_extracted = audio_temp_path / f"track_{index}_extracted.flac"
temp_normalized = audio_temp_path / f"track_{index}_normalized.flac"
final_opus = audio_temp_path / f"track_{index}_final.opus"
print(f" - Extracting Audio Track #{index} to FLAC...")
ffmpeg_args = [
"ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(source_file), "-map", f"0:{index}", "-map_metadata", "-1"
]
if should_downmix and ch >= 6:
if ch == 6:
ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4|c1=c2+0.30*c1+0.30*c5"]
elif ch == 8:
ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4+0.30*c6|c1=c2+0.30*c1+0.30*c5+0.30*c7"]
else: # Other multi-channel (e.g. 7ch, 10ch)
ffmpeg_args += ["-ac", "2"]
ffmpeg_args += ["-c:a", "flac", str(temp_extracted)]
run_cmd(ffmpeg_args)
print(f" - Normalizing Audio Track #{index} with ffmpeg (loudnorm 2-pass)...")
# First pass: Analyze the audio to get loudnorm stats
# The stats are printed to stderr, so we must use subprocess.run directly to capture it.
print(" - Pass 1: Analyzing...")
result = subprocess.run(
["ffmpeg", "-v", "info", "-i", str(temp_extracted), "-af", "loudnorm=I=-23:LRA=7:tp=-1:print_format=json", "-f", "null", "-"],
capture_output=True, text=True, check=True)
# Find the start of the JSON block in stderr and parse it.
# This is more robust than slicing the last N lines.
# We find the start and end of the JSON block to avoid parsing extra data.
stderr_output = result.stderr
json_start_index = stderr_output.find('{')
if json_start_index == -1:
raise ValueError("Could not find start of JSON block in ffmpeg output for loudnorm analysis.")
brace_level = 0
json_end_index = -1
for i, char in enumerate(stderr_output[json_start_index:]):
if char == '{':
brace_level += 1
elif char == '}':
brace_level -= 1
if brace_level == 0:
json_end_index = json_start_index + i + 1
break
stats = json.loads(stderr_output[json_start_index:json_end_index])
# Second pass: Apply the normalization using the stats from the first pass
print(" - Pass 2: Applying normalization...")
run_cmd([
"ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(temp_extracted), "-af",
f"loudnorm=I=-23:LRA=7:tp=-1:measured_i={stats['input_i']}:measured_lra={stats['input_lra']}:measured_tp={stats['input_tp']}:measured_thresh={stats['input_thresh']}:offset={stats['target_offset']}",
"-c:a", "flac", str(temp_normalized)
])
# Set bitrate based on the final channel count of the Opus file.
# If we are downmixing, the result is stereo.
# If not, the result has the original channel count.
is_being_downmixed = should_downmix and ch >= 6
if is_being_downmixed:
# Downmixing from 5.1 or 7.1 results in a stereo track.
bitrate = "128k"
else:
# Not downmixing (or source is already stereo or less).
# Base bitrate on the source channel count.
if ch == 1: # Mono
bitrate = "64k"
elif ch == 2: # Stereo
bitrate = "128k"
elif ch == 6: # 5.1 Surround
bitrate = "256k"
elif ch == 8: # 7.1 Surround
bitrate = "384k"
else: # Other layouts
bitrate = "96k" # A sensible default for other/uncommon layouts.
print(f" - Encoding Audio Track #{index} to Opus at {bitrate}...")
run_cmd([
"opusenc", "--vbr", "--bitrate", bitrate, str(temp_normalized), str(final_opus)
])
return final_opus
def convert_video(source_file_base, source_file_full, is_vfr, target_cfr_fps_for_handbrake, autocrop_filter=None):
print(" --- Starting Video Processing ---")
# source_file_base is file_path.stem (e.g., "my.anime.episode.01")
vpy_file = Path(f"{source_file_base}.vpy")
ut_video_file = Path(f"{source_file_base}.ut.mkv")
encoded_video_file = Path(f"temp-{source_file_base}.mkv")
handbrake_cfr_intermediate_file = None # To store path of HandBrake output if created
current_input_for_utvideo = Path(source_file_full)
if is_vfr and target_cfr_fps_for_handbrake:
print(f" - Source is VFR. Converting to CFR ({target_cfr_fps_for_handbrake}) with HandBrakeCLI...")
handbrake_cfr_intermediate_file = Path(f"{source_file_base}.cfr_temp.mkv")
handbrake_args = [
"HandBrakeCLI",
"--input", str(source_file_full),
"--output", str(handbrake_cfr_intermediate_file),
"--cfr",
"--rate", str(target_cfr_fps_for_handbrake),
"--encoder", "x264_10bit", # Changed to x264_10bit for 10-bit CFR intermediate
"--quality", "0", # CRF 0 for x264 is often considered visually lossless, or near-lossless
"--encoder-preset", "superfast", # Use a fast preset for quicker processing
"--encoder-tune", "fastdecode", # Added tune for faster decoding
"--audio", "none",
"--subtitle", "none",
"--crop-mode", "none" # Disable auto-cropping
]
print(f" - Running HandBrakeCLI: {' '.join(handbrake_args)}")
try:
run_cmd(handbrake_args)
if handbrake_cfr_intermediate_file.exists() and handbrake_cfr_intermediate_file.stat().st_size > 0:
print(f" - HandBrake VFR to CFR conversion successful: {handbrake_cfr_intermediate_file}")
current_input_for_utvideo = handbrake_cfr_intermediate_file
else:
print(f" - Warning: HandBrakeCLI VFR-to-CFR conversion failed or produced an empty file. Proceeding with original source for UTVideo.")
handbrake_cfr_intermediate_file = None # Ensure it's None if failed
except subprocess.CalledProcessError as e:
print(f" - Error during HandBrakeCLI execution: {e}")
print(f" - Proceeding with original source for UTVideo.")
handbrake_cfr_intermediate_file = None # Ensure it's None if failed
print(" - Creating UTVideo intermediate file (overwriting if exists)...")
# Check if source is already UTVideo
ffprobe_cmd = [
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1",
str(current_input_for_utvideo) # Use current input, which might be HandBrake output
]
source_codec = run_cmd(ffprobe_cmd, capture_output=True, check=True).strip()
video_codec_args = ["-c:v", "utvideo"]
if source_codec == "utvideo" and current_input_for_utvideo == Path(source_file_full): # Only copy if original was UTVideo
print(" - Source is already UTVideo. Copying video stream...")
video_codec_args = ["-c:v", "copy"]
ffmpeg_args = [
"ffmpeg", "-hide_banner", "-v", "quiet", "-stats", "-y", "-i", str(current_input_for_utvideo),
"-map", "0:v:0", "-map_metadata", "-1", "-map_chapters", "-1", "-an", "-sn", "-dn",
]
if autocrop_filter:
ffmpeg_args += ["-vf", autocrop_filter]
ffmpeg_args += video_codec_args + [str(ut_video_file)]
run_cmd(ffmpeg_args)
print(" - Indexing UTVideo file with ffmsindex for VapourSynth...")
ffmsindex_args = ["ffmsindex", "-f", str(ut_video_file)]
run_cmd(ffmsindex_args)
ut_video_full_path = os.path.abspath(ut_video_file)
vpy_script_content = f"""import vapoursynth as vs
core = vs.core
core.num_threads = 4
clip = core.ffms2.Source(source=r'''{ut_video_full_path}''')
clip = core.resize.Point(clip, format=vs.YUV420P10, matrix_in_s="709") # type: ignore
clip.set_output()
"""
with vpy_file.open("w", encoding="utf-8") as f:
f.write(vpy_script_content)
print(" - Starting AV1 encode with av1an (this will take a long time)...")
total_cores = os.cpu_count() or 4 # Fallback if cpu_count is None
workers = max(1, (total_cores // 2) - 1) # Half the cores minus one, with a minimum of 1 worker.
print(f" - Using {workers} workers for av1an (Total Cores: {total_cores}, Logic: (Cores/2)-1).")
# Create the parameter string for av1an's -v option, which expects a single string.
av1an_video_params_str = " ".join([f"--{key} {value}" for key, value in SVT_AV1_PARAMS.items()])
print(f" - Using SVT-AV1 parameters: {av1an_video_params_str}")
av1an_enc_args = [
"av1an", "-i", str(vpy_file), "-o", str(encoded_video_file), "-n",
"-e", "svt-av1", "--resume", "--sc-pix-format", "yuv420p", "-c", "mkvmerge",
"--set-thread-affinity", "2", "--pix-format", "yuv420p10le", "--force", "--no-defaults",
"-w", str(workers),
"-v", av1an_video_params_str
]
run_cmd(av1an_enc_args)
print(" --- Finished Video Processing ---")
return encoded_video_file, handbrake_cfr_intermediate_file
def is_ffmpeg_decodable(file_path):
"""Quickly check if ffmpeg can decode the input file."""
try:
# Try to decode a short segment of the first audio stream
subprocess.run([
"ffmpeg", "-v", "error", "-i", str(file_path), "-map", "0:a:0", "-t", "1", "-f", "null", "-"
], check=True)
return True
except subprocess.CalledProcessError:
return False
# --- CROPDETECT LOGIC FROM cropdetect.py ---
import argparse as _argparse_cropdetect
import multiprocessing as _multiprocessing_cropdetect
from collections import Counter as _Counter_cropdetect
COLOR_GREEN = "\033[92m"
COLOR_RED = "\033[91m"
COLOR_YELLOW = "\033[93m"
COLOR_RESET = "\033[0m"
KNOWN_ASPECT_RATIOS = [
{"name": "HDTV (16:9)", "ratio": 16/9},
{"name": "Widescreen (Scope)", "ratio": 2.39},
{"name": "Widescreen (Flat)", "ratio": 1.85},
{"name": "IMAX Digital (1.90:1)", "ratio": 1.90},
{"name": "Fullscreen (4:3)", "ratio": 4/3},
{"name": "IMAX 70mm (1.43:1)", "ratio": 1.43},
]
def _check_prerequisites_cropdetect():
for tool in ['ffmpeg', 'ffprobe']:
if not shutil.which(tool):
print(f"Error: '{tool}' command not found. Is it installed and in your PATH?")
return False
return True
def _analyze_segment_cropdetect(task_args):
seek_time, input_file, width, height = task_args
ffmpeg_args = [
'ffmpeg', '-hide_banner',
'-ss', str(seek_time),
'-i', input_file, '-t', '1', '-vf', 'cropdetect',
'-f', 'null', '-'
]
result = subprocess.run(ffmpeg_args, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
crop_detections = re.findall(r'crop=(\d+):(\d+):(\d+):(\d+)', result.stderr)
significant_crops = []
for w_str, h_str, x_str, y_str in crop_detections:
w, h, x, y = map(int, [w_str, h_str, x_str, y_str])
significant_crops.append((f"crop={w}:{h}:{x}:{y}", seek_time))
return significant_crops
def _snap_to_known_ar_cropdetect(w, h, x, y, video_w, video_h, tolerance=0.03):
if h == 0: return f"crop={w}:{h}:{x}:{y}", None
detected_ratio = w / h
best_match = None
smallest_diff = float('inf')
for ar in KNOWN_ASPECT_RATIOS:
diff = abs(detected_ratio - ar['ratio'])
if diff < smallest_diff:
smallest_diff = diff
best_match = ar
if not best_match or (smallest_diff / best_match['ratio']) >= tolerance:
return f"crop={w}:{h}:{x}:{y}", None
if abs(w - video_w) < 16:
new_h = round(video_w / best_match['ratio'])
if new_h % 8 != 0:
new_h = new_h + (8 - (new_h % 8))
new_h = min(new_h, video_h)
new_y = round((video_h - new_h) / 2)
if new_y % 2 != 0:
new_y -= 1
new_y = max(0, new_y)
return f"crop={video_w}:{new_h}:0:{new_y}", best_match['name']
if abs(h - video_h) < 16:
new_w = round(video_h * best_match['ratio'])
if new_w % 8 != 0:
new_w = new_w + (8 - (new_w % 8))
new_w = min(new_w, video_w)
new_x = round((video_w - new_w) / 2)
if new_x % 2 != 0:
new_x -= 1
new_x = max(0, new_x)
return f"crop={new_w}:{video_h}:{new_x}:0", best_match['name']
return f"crop={w}:{h}:{x}:{y}", None
def _cluster_crop_values_cropdetect(crop_counts, tolerance=8):
clusters = []
temp_counts = crop_counts.copy()
while temp_counts:
center_str, _ = temp_counts.most_common(1)[0]
try:
_, values = center_str.split('=');
cw, ch, cx, cy = map(int, values.split(':'))
except (ValueError, IndexError):
del temp_counts[center_str]
continue
cluster_total_count = 0
crops_to_remove = []
for crop_str, count in temp_counts.items():
try:
_, values = crop_str.split('=');
w, h, x, y = map(int, values.split(':'))
if abs(x - cx) <= tolerance and abs(y - cy) <= tolerance:
cluster_total_count += count
crops_to_remove.append(crop_str)
except (ValueError, IndexError):
continue
if cluster_total_count > 0:
clusters.append({'center': center_str, 'count': cluster_total_count})
for crop_str in crops_to_remove:
del temp_counts[crop_str]
clusters.sort(key=lambda c: c['count'], reverse=True)
return clusters
def _parse_crop_string_cropdetect(crop_str):
try:
_, values = crop_str.split('=');
w, h, x, y = map(int, values.split(':'))
return {'w': w, 'h': h, 'x': x, 'y': y}
except (ValueError, IndexError):
return None
def _calculate_bounding_box_cropdetect(crop_keys):
min_x = min_w = min_y = min_h = float('inf')
max_x = max_w = max_y = max_h = float('-inf')
for key in crop_keys:
parsed = _parse_crop_string_cropdetect(key)
if not parsed:
continue
w, h, x, y = parsed['w'], parsed['h'], parsed['x'], parsed['y']
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x + w)
max_y = max(max_y, y + h)
min_w = min(min_w, w)
min_h = min(min_h, h)
max_w = max(max_w, w)
max_h = max(max_h, h)
if (max_x - min_x) <= 2 and (max_y - min_y) <= 2:
return None
bounding_crop = f"crop={max_x - min_x}:{max_y - min_y}:{min_x}:{min_y}"
return bounding_crop
def _analyze_video_cropdetect(input_file, duration, width, height, num_workers, significant_crop_threshold, min_crop, debug=False):
num_tasks = num_workers * 4
segment_duration = max(1, duration // num_tasks)
tasks = [(i * segment_duration, input_file, width, height) for i in range(num_tasks)]
crop_results = []
with _multiprocessing_cropdetect.Pool(processes=num_workers) as pool:
results_iterator = pool.imap_unordered(_analyze_segment_cropdetect, tasks)
for result in results_iterator:
crop_results.append(result)
all_crops_with_ts = [crop for sublist in crop_results for crop in sublist]
all_crop_strings = [item[0] for item in all_crops_with_ts]
if not all_crop_strings:
return None
crop_counts = _Counter_cropdetect(all_crop_strings)
clusters = _cluster_crop_values_cropdetect(crop_counts)
total_detections = sum(c['count'] for c in clusters)
significant_clusters = []
for cluster in clusters:
percentage = (cluster['count'] / total_detections) * 100
if percentage >= significant_crop_threshold:
significant_clusters.append(cluster)
for cluster in significant_clusters:
parsed_crop = _parse_crop_string_cropdetect(cluster['center'])
if parsed_crop:
_, ar_label = _snap_to_known_ar_cropdetect(
parsed_crop['w'], parsed_crop['h'], parsed_crop['x'], parsed_crop['y'], width, height
)
cluster['ar_label'] = ar_label
else:
cluster['ar_label'] = None
if not significant_clusters:
return None
elif len(significant_clusters) == 1:
dominant_cluster = significant_clusters[0]
parsed_crop = _parse_crop_string_cropdetect(dominant_cluster['center'])
snapped_crop, ar_label = _snap_to_known_ar_cropdetect(
parsed_crop['w'], parsed_crop['h'], parsed_crop['x'], parsed_crop['y'], width, height
)
parsed_snapped = _parse_crop_string_cropdetect(snapped_crop)
if parsed_snapped and parsed_snapped['w'] == width and parsed_snapped['h'] == height:
return None
else:
return snapped_crop
else:
crop_keys = [c['center'] for c in significant_clusters]
bounding_box_crop = _calculate_bounding_box_cropdetect(crop_keys)
if bounding_box_crop:
parsed_bb = _parse_crop_string_cropdetect(bounding_box_crop)
snapped_crop, ar_label = _snap_to_known_ar_cropdetect(
parsed_bb['w'], parsed_bb['h'], parsed_bb['x'], parsed_bb['y'], width, height
)
parsed_snapped = _parse_crop_string_cropdetect(snapped_crop)
if parsed_snapped and parsed_snapped['w'] == width and parsed_snapped['h'] == height:
return None
else:
return snapped_crop
else:
return None
def detect_autocrop_filter(input_file, significant_crop_threshold=5.0, min_crop=10, debug=False):
if not _check_prerequisites_cropdetect():
return None
try:
probe_duration_args = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1',
input_file
]
duration_str = subprocess.check_output(probe_duration_args, stderr=subprocess.STDOUT, text=True)
duration = int(float(duration_str))
probe_res_args = [
'ffprobe', '-v', 'error',
'-select_streams', 'v',
'-show_entries', 'stream=width,height,disposition',
'-of', 'json',
input_file
]
probe_output = subprocess.check_output(probe_res_args, stderr=subprocess.STDOUT, text=True)
streams_data = json.loads(probe_output)
video_stream = None
for stream in streams_data.get('streams', []):
if stream.get('disposition', {}).get('attached_pic', 0) == 0:
video_stream = stream
break
if not video_stream or 'width' not in video_stream or 'height' not in video_stream:
return None
width = int(video_stream['width'])
height = int(video_stream['height'])
except Exception:
return None
return _analyze_video_cropdetect(input_file, duration, width, height, max(1, os.cpu_count() // 2), significant_crop_threshold, min_crop, debug)
def main(no_downmix=False, autocrop=False, speed=None, quality=None, grain=None):
check_tools()
# Override default SVT-AV1 params if provided via command line
if speed:
SVT_AV1_PARAMS["speed"] = speed
if quality:
SVT_AV1_PARAMS["quality"] = quality
if grain is not None:
SVT_AV1_PARAMS["film-grain"] = grain
current_dir = Path(".")
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
)
if not files_to_process:
print("No MKV files found to process. Exiting.")
return
DIR_COMPLETED.mkdir(exist_ok=True, parents=True)
DIR_ORIGINAL.mkdir(exist_ok=True, parents=True)
DIR_CONV_LOGS.mkdir(exist_ok=True, parents=True)
while True:
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-") or f.name.endswith(".cfr_temp.mkv"))
)
if not files_to_process:
print("No more .mkv files found to process in the current directory. The script will now exit.")
break
file_path = files_to_process[0]
if not is_ffmpeg_decodable(file_path):
print(f"ERROR: ffmpeg cannot decode '{file_path.name}'. Skipping this file.", file=sys.stderr)
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
continue
print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
log_file_name = f"{file_path.stem}.log"
log_file_path = DIR_CONV_LOGS / log_file_name
original_stdout_console = sys.stdout
original_stderr_console = sys.stderr
print(f"Processing: {file_path.name}", file=original_stdout_console)
print(f"Logging output to: {log_file_path}", file=original_stdout_console)
log_file_handle = None
processing_error_occurred = False
date_for_runtime_calc = datetime.now()
try:
log_file_handle = open(log_file_path, 'w', encoding='utf-8')
sys.stdout = log_file_handle
sys.stderr = log_file_handle
print(f"STARTING LOG FOR: {file_path.name}")
print(f"Processing started at: {date_for_runtime_calc}")
print(f"Full input file path: {file_path.resolve()}")
print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
input_file_abs = file_path.resolve()
intermediate_output_file = current_dir / f"output-{file_path.name}"
audio_temp_dir = None
handbrake_intermediate_for_cleanup = None
try:
audio_temp_dir = tempfile.mkdtemp(prefix="anime_audio_")
print(f"Audio temporary directory created at: {audio_temp_dir}")
print(f"Analyzing file: {input_file_abs}")
ffprobe_info_json = run_cmd([
"ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(input_file_abs)
], capture_output=True)
ffprobe_info = json.loads(ffprobe_info_json)
mkvmerge_info_json = run_cmd([
"mkvmerge", "-J", str(input_file_abs)
], capture_output=True)
mkv_info = json.loads(mkvmerge_info_json)
mediainfo_json = run_cmd([
"mediainfo", "--Output=JSON", "-f", str(input_file_abs)
], capture_output=True)
media_info = json.loads(mediainfo_json)
is_vfr = False
target_cfr_fps_for_handbrake = None
video_track_info = None
if media_info.get("media") and media_info["media"].get("track"):
for track in media_info["media"]["track"]:
if track.get("@type") == "Video":
video_track_info = track
break
if video_track_info:
frame_rate_mode = video_track_info.get("FrameRate_Mode")
if frame_rate_mode and frame_rate_mode.upper() in ["VFR", "VARIABLE"]:
is_vfr = True
print(f" - Detected VFR based on MediaInfo FrameRate_Mode: {frame_rate_mode}")
original_fps_str = video_track_info.get("FrameRate_Original_String")
if original_fps_str:
match = re.search(r'\((\d+/\d+)\)', original_fps_str)
if match:
target_cfr_fps_for_handbrake = match.group(1)
else:
target_cfr_fps_for_handbrake = video_track_info.get("FrameRate_Original")
if not target_cfr_fps_for_handbrake:
target_cfr_fps_for_handbrake = video_track_info.get("FrameRate_Original")
if not target_cfr_fps_for_handbrake:
target_cfr_fps_for_handbrake = video_track_info.get("FrameRate")
if target_cfr_fps_for_handbrake:
print(f" - Using MediaInfo FrameRate ({target_cfr_fps_for_handbrake}) as fallback for HandBrake target FPS.")
if target_cfr_fps_for_handbrake:
print(f" - Target CFR for HandBrake: {target_cfr_fps_for_handbrake}")
if isinstance(target_cfr_fps_for_handbrake, str) and "/" in target_cfr_fps_for_handbrake:
try:
num, den = map(float, target_cfr_fps_for_handbrake.split('/'))
target_cfr_fps_for_handbrake = f"{num / den:.3f}"
print(f" - Converted fractional FPS to decimal for HandBrake: {target_cfr_fps_for_handbrake}")
except ValueError:
print(f" - Warning: Could not parse fractional FPS '{target_cfr_fps_for_handbrake}'. HandBrakeCLI might fail.")
is_vfr = False
else:
print(" - Warning: VFR detected, but could not determine target CFR from MediaInfo. Will attempt standard UTVideo conversion without HandBrake.")
is_vfr = False
else:
print(f" - Video appears to be CFR or FrameRate_Mode not specified as VFR/Variable by MediaInfo.")
autocrop_filter = None
if autocrop:
print("--- Running autocrop detection ---")
autocrop_filter = detect_autocrop_filter(str(input_file_abs))
if autocrop_filter:
print(f" - Autocrop filter detected: {autocrop_filter}")
else:
print(" - No crop needed or detected.")
encoded_video_file, handbrake_intermediate_for_cleanup = convert_video(
file_path.stem, str(input_file_abs), is_vfr, target_cfr_fps_for_handbrake, autocrop_filter=autocrop_filter
)
print("--- Starting Audio Processing ---")
processed_audio_files = []
audio_tracks_to_remux = []
audio_streams = [s for s in ffprobe_info.get("streams", []) if s.get("codec_type") == "audio"]
# Build mkvmerge track mapping by track ID
mkv_audio_tracks = {t["id"]: t for t in mkv_info.get("tracks", []) if t.get("type") == "audio"}
# Build mediainfo track mapping by StreamOrder
media_tracks_data = media_info.get("media", {}).get("track", [])
mediainfo_audio_tracks = {int(t.get("StreamOrder", -1)): t for t in media_tracks_data if t.get("@type") == "Audio"}
for stream in audio_streams:
stream_index = stream["index"]
codec = stream.get("codec_name")
channels = stream.get("channels", 2)
language = stream.get("tags", {}).get("language", "und")
# Find mkvmerge track by matching ffprobe stream index to mkvmerge track's 'properties'->'stream_id'
mkv_track = None
for t in mkv_info.get("tracks", []):
if t.get("type") == "audio" and t.get("properties", {}).get("stream_id") == stream_index:
mkv_track = t
break
if not mkv_track:
# Fallback: try by position
mkv_track = mkv_info.get("tracks", [])[stream_index] if stream_index < len(mkv_info.get("tracks", [])) else {}
track_id = mkv_track.get("id", -1)
track_title = mkv_track.get("properties", {}).get("track_name", "")
# Find mediainfo track by StreamOrder
audio_track_info = mediainfo_audio_tracks.get(stream_index)
track_delay = 0
delay_raw = audio_track_info.get("Video_Delay") if audio_track_info else None
if delay_raw is not None:
try:
delay_val = float(delay_raw)
# If the value is a float < 1, it's seconds, so convert to ms.
if delay_val < 1:
track_delay = int(round(delay_val * 1000))
else:
track_delay = int(round(delay_val))
except Exception:
track_delay = 0
print(f"Processing Audio Stream #{stream_index} (TID: {track_id}, Codec: {codec}, Channels: {channels})")
if codec in REMUX_CODECS:
audio_tracks_to_remux.append(str(track_id))
else:
# Convert any codec that is not in REMUX_CODECS
opus_file = convert_audio_track(
stream_index, channels, language, audio_temp_dir, str(input_file_abs), not no_downmix
)
processed_audio_files.append({
"Path": opus_file,
"Language": language,
"Title": track_title,
"Delay": track_delay
})
print("--- Finished Audio Processing ---")
# Final mux
print("Assembling final file with mkvmerge...")
mkvmerge_args = ["mkvmerge", "-o", str(intermediate_output_file), str(encoded_video_file)]
for file_info in processed_audio_files:
sync_switch = ["--sync", f"0:{file_info['Delay']}"] if file_info["Delay"] else []
mkvmerge_args += [
"--language", f"0:{file_info['Language']}",
"--track-name", f"0:{file_info['Title']}"
] + sync_switch + [str(file_info["Path"])]
source_copy_args = ["--no-video"]
if audio_tracks_to_remux:
source_copy_args += ["--audio-tracks", ",".join(audio_tracks_to_remux)]
else:
source_copy_args += ["--no-audio"]
mkvmerge_args += source_copy_args + [str(input_file_abs)]
run_cmd(mkvmerge_args)
# Move files
print("Moving files to final destinations...")
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
shutil.move(str(intermediate_output_file), DIR_COMPLETED / file_path.name)
print("Cleaning up persistent video temporary files (after successful processing)...")
video_temp_files_on_success = [
current_dir / f"{file_path.stem}.vpy",
current_dir / f"{file_path.stem}.ut.mkv",
current_dir / f"temp-{file_path.stem}.mkv", # This is encoded_video_file
current_dir / f"{file_path.stem}.ut.mkv.lwi",
current_dir / f"{file_path.stem}.ut.mkv.ffindex",
]
if handbrake_intermediate_for_cleanup and handbrake_intermediate_for_cleanup.exists():
video_temp_files_on_success.append(handbrake_intermediate_for_cleanup)
for temp_vid_file in video_temp_files_on_success:
if temp_vid_file.exists():
print(f" Deleting: {temp_vid_file}")
temp_vid_file.unlink(missing_ok=True)
else:
print(f" Skipping (not found): {temp_vid_file}")
except Exception as e:
print(f"ERROR: An error occurred while processing '{file_path.name}': {e}", file=sys.stderr) # Goes to log
original_stderr_console.write(f"ERROR during processing of '{file_path.name}': {e}\nSee log '{log_file_path}' for details.\n")
processing_error_occurred = True
finally:
# This is the original 'finally' block. Its prints go to the log file.
print("--- Starting Universal Cleanup (for this file) ---")
print(" - Cleaning up disposable audio temporary directory...")
if audio_temp_dir and Path(audio_temp_dir).exists():
shutil.rmtree(audio_temp_dir, ignore_errors=True)
print(f" - Deleted audio temp dir: {audio_temp_dir}")
elif audio_temp_dir: # Was created but now not found
print(f" - Audio temp dir not found or already cleaned: {audio_temp_dir}")
else: # Was never created
print(f" - Audio temp dir was not created.")
print(" - Cleaning up intermediate output file (if it wasn't moved on success)...")
if intermediate_output_file.exists(): # Check if it still exists (e.g. error before move)
if processing_error_occurred:
print(f" - WARNING: Processing error occurred. Intermediate output file '{intermediate_output_file}' is being preserved at its original path for inspection.")
else:
# No processing error, so it should have been moved.
# If it's still here, it's unexpected but we'll clean it up.
print(f" - INFO: Intermediate output file '{intermediate_output_file}' found at original path despite no errors (expected to be moved). Cleaning up.")
intermediate_output_file.unlink(missing_ok=True) # Only unlink if no error and it exists
print(f" - Deleted intermediate output file from original path: {intermediate_output_file}")
else:
# File does not exist at original path
if not processing_error_occurred:
print(f" - Intermediate output file successfully moved (not found at original path, as expected): {intermediate_output_file}")
else:
print(f" - Processing error occurred, and intermediate output file '{intermediate_output_file}' not found at original path (likely not created or cleaned by another step).")
# --- End of original per-file processing block ---
print(f"FINISHED LOG FOR: {file_path.name}")
# --- End of log-specific messages ---
finally: # Outer finally for restoring stdout/stderr and closing log file
runtime = datetime.now() - date_for_runtime_calc
runtime_str = str(runtime).split('.')[0]
# This print goes to the log file, as stdout is not yet restored.
print(f"\nTotal runtime for this file: {runtime_str}")
if sys.stdout != original_stdout_console:
sys.stdout = original_stdout_console
if sys.stderr != original_stderr_console:
sys.stderr = original_stderr_console
if log_file_handle:
log_file_handle.close()
# Announce to console (original stdout/stderr) that this file is done
if processing_error_occurred:
original_stderr_console.write(f"File: {file_path.name}\n")
original_stderr_console.write(f"Log: {log_file_path}\n")
original_stderr_console.write(f"Runtime: {runtime_str}\n")
else:
original_stdout_console.write(f"File: {file_path.name}\n")
original_stdout_console.write(f"Log: {log_file_path}\n")
original_stdout_console.write(f"Runtime: {runtime_str}\n")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Batch-process MKV files with resumable video encoding, audio downmixing, per-file logging, and optional autocrop.")
parser.add_argument("--no-downmix", action="store_true", help="Preserve original audio channel layout.")
parser.add_argument("--autocrop", action="store_true", help="Automatically detect and crop black bars from video using cropdetect.")
parser.add_argument("--speed", type=str, help="Set the encoding speed. Possible values: slower, slow, medium, fast, faster.")
parser.add_argument("--quality", type=str, help="Set the encoding quality. Possible values: lowest, low, medium, high, higher.")
parser.add_argument("--grain", type=int, help="Set the film-grain value (number). Adjusts the film grain synthesis level.")
args = parser.parse_args()
main(no_downmix=args.no_downmix, autocrop=args.autocrop, speed=args.speed, quality=args.quality, grain=args.grain)