encoding-scripts/tv_audio_encoder.py

#!/usr/bin/env python3
import os
import sys
import subprocess
import shutil
import tempfile
import json
from datetime import datetime
from pathlib import Path

REQUIRED_TOOLS_MAP = {
    "ffmpeg": "extra/ffmpeg",
    "ffprobe": "extra/ffmpeg",  # Part of ffmpeg package
    "mkvmerge": "extra/mkvtoolnix-cli",
    "mkvpropedit": "extra/mkvtoolnix-cli", # Part of mkvtoolnix-cli
    "sox": "extra/sox",
    "opusenc": "extra/opus-tools",
    "mediainfo": "extra/mediainfo",
    "alabamaEncoder": "pipx install alabamaEncoder"
}
DIR_COMPLETED = Path("completed")
DIR_ORIGINAL = Path("original")

REMUX_CODECS = {"aac", "opus"}  # Using a set for efficient lookups
CONVERT_CODECS = {"dts", "ac3", "eac3", "flac", "wavpack", "alac"}

def check_tools():
    if sys.platform == "win32":
        print("ERROR: This script is not supported on Windows due to alabamaEncoder compatibility.")
        print("Please run this script on Linux or macOS.")
        sys.exit(1)

    for tool_exe, package_name in REQUIRED_TOOLS_MAP.items():
        if shutil.which(tool_exe) is None:
            print(f"Required tool '{tool_exe}' not found. On Arch Linux, try installing '{package_name}'.")
            sys.exit(1)

def run_cmd(cmd, capture_output=False, check=True):
    if capture_output:
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=check, text=True)
        return result.stdout
    else:
        subprocess.run(cmd, check=check)

def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_downmix):
    audio_temp_path = Path(audio_temp_dir)
    temp_extracted = audio_temp_path / f"track_{index}_extracted.flac"
    temp_normalized = audio_temp_path / f"track_{index}_normalized.flac"
    final_opus = audio_temp_path / f"track_{index}_final.opus"

    print(f"    - Extracting Audio Track #{index} to FLAC...")
    ffmpeg_args = [
        "ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(source_file), "-map", f"0:{index}"
    ]
    if should_downmix and ch >= 6:
        if ch == 6:
            ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4|c1=c2+0.30*c1+0.30*c5"]
        elif ch == 8:
            ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4+0.30*c6|c1=c2+0.30*c1+0.30*c5+0.30*c7"]
        else: # Other multi-channel (e.g. 7ch, 10ch)
            ffmpeg_args += ["-ac", "2"]
    ffmpeg_args += ["-c:a", "flac", str(temp_extracted)]
    run_cmd(ffmpeg_args)

    print(f"    - Normalizing Audio Track #{index} with SoX...")
    run_cmd([
        "sox", str(temp_extracted), str(temp_normalized), "-S", "--temp", str(audio_temp_path), "--guard", "gain", "-n"
    ])

    # Set bitrate based on the final channel count of the Opus file.
    # If we are downmixing, the result is stereo.
    # If not, the result has the original channel count.
    is_being_downmixed = should_downmix and ch >= 6

    if is_being_downmixed:
        # Downmixing from 5.1 or 7.1 results in a stereo track.
        bitrate = "128k"
    else:
        # Not downmixing (or source is already stereo or less).
        # Base bitrate on the source channel count.
        if ch == 2:      # Stereo
            bitrate = "128k"
        elif ch == 6:    # 5.1 Surround
            bitrate = "256k"
        elif ch == 8:    # 7.1 Surround
            bitrate = "384k"
        else:            # Mono or other layouts
            bitrate = "96k" # A sensible default for mono.

    print(f"    - Encoding Audio Track #{index} to Opus at {bitrate}...")
    run_cmd([
        "opusenc", "--vbr", "--bitrate", bitrate, str(temp_normalized), str(final_opus)
    ])
    return final_opus

def convert_video(source_file_base, source_file_full):
    print("  --- Starting Video Processing ---")
    # source_file_base is the full stem from the original file,
    # e.g., "cheers.s01e04.der.lueckenbuesser.german.dl.fs.1080p.web.h264-cnhd"
    ut_video_file = Path(f"{source_file_base}.ut.mkv")
    encoded_video_file = Path(f"temp-{source_file_base}.mkv")

    print("    - Creating UTVideo intermediate file (overwriting if exists)...")
    # Check if source is already UTVideo
    ffprobe_cmd = [
        "ffprobe", "-v", "error", "-select_streams", "v:0",
        "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1",
        source_file_full
    ]
    source_codec = run_cmd(ffprobe_cmd, capture_output=True, check=True).strip()

    video_codec_args = ["-c:v", "utvideo"]
    if source_codec == "utvideo":
        print("    - Source is already UTVideo. Copying video stream...")
        video_codec_args = ["-c:v", "copy"]

    ffmpeg_args = [
        "ffmpeg", "-hide_banner", "-v", "quiet", "-stats", "-y", "-i", source_file_full,
        "-map", "0:v:0", "-map_metadata", "-1", "-map_chapters", "-1", "-an", "-sn", "-dn",
    ] + video_codec_args + [str(ut_video_file)]
    run_cmd(ffmpeg_args)

    print("    - Starting video encode with AlabamaEncoder (this will take a long time)...")
    # Note: AlabamaEncoder options like --vmaf_target are used here.
    # You might want to adjust them based on your specific needs.
    # Resumability and specific SVT-AV1 parameters previously used with av1an
    # are not directly translated here as AlabamaEncoder handles encoding differently.
    alabama_encoder_args = [
        "alabamaEncoder", "encode",
        str(ut_video_file),      # This is the UT video file created by ffmpeg
        str(encoded_video_file),
        "--grain", "-2",          # Example option, adjust as needed
        "--vmaf_target", "96",   # Example option, adjust as needed
        "--dont_encode_audio"    # Important as audio is processed separately
    ]
    run_cmd(alabama_encoder_args)

    print("    - Cleaning metadata with mkvpropedit...")
    propedit_args = [
        "mkvpropedit",
        str(encoded_video_file),
        "--tags", "global:",
        "-d", "title"
    ]
    run_cmd(propedit_args)

    print("  --- Finished Video Processing ---")
    return ut_video_file, encoded_video_file

def main(no_downmix=False):
    check_tools()
    DIR_COMPLETED.mkdir(exist_ok=True, parents=True)
    DIR_ORIGINAL.mkdir(exist_ok=True, parents=True)

    current_dir = Path(".")
    files_to_process = sorted(
        f for f in current_dir.glob("*.mkv")
        if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-"))
    )

    if not files_to_process:
        print("No .mkv files found to process in the current directory.")
        return

    for file_path in files_to_process:
        print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
        print(f"Starting full processing for: {file_path.name}")
        date = datetime.now()
        input_file_abs = file_path.resolve()
        intermediate_output_file = current_dir / f"output-{file_path.name}"
        audio_temp_dir = None # Initialize to None
        created_ut_video_path = None
        created_encoded_video_path = None

        try:
            audio_temp_dir = tempfile.mkdtemp(prefix="tv_audio_") # UUID is not strictly needed for uniqueness
            print(f"Audio temporary directory created at: {audio_temp_dir}")
            print(f"Analyzing file: {input_file_abs}")

            ffprobe_info_json = run_cmd([
                "ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(input_file_abs)
            ], capture_output=True)
            ffprobe_info = json.loads(ffprobe_info_json)

            mkvmerge_info_json = run_cmd([
                "mkvmerge", "-J", str(input_file_abs)
            ], capture_output=True)
            mkv_info = json.loads(mkvmerge_info_json)

            mediainfo_json = run_cmd([
                "mediainfo", "--Output=JSON", "-f", str(input_file_abs)
            ], capture_output=True)
            media_info = json.loads(mediainfo_json)

            created_ut_video_path, created_encoded_video_path = convert_video(file_path.stem, str(input_file_abs))

            print("--- Starting Audio Processing ---")
            processed_audio_files = []
            audio_tracks_to_remux = []
            audio_streams = [s for s in ffprobe_info.get("streams", []) if s.get("codec_type") == "audio"]

            for stream in audio_streams:
                stream_index = stream["index"]
                codec = stream.get("codec_name")
                channels = stream.get("channels", 2)
                language = stream.get("tags", {}).get("language", "und")
                mkv_track = mkv_info.get("tracks", [])[stream_index] if stream_index < len(mkv_info.get("tracks", [])) else {}
                track_id = mkv_track.get("id", -1)
                track_title = mkv_track.get("properties", {}).get("track_name", "")
                track_delay = 0
                media_tracks_data = media_info.get("media", {}).get("track", [])
                audio_track_info = next((t for t in media_tracks_data if t.get("@type") == "Audio" and int(t.get("StreamOrder", -1)) == stream_index), None)
                delay_in_seconds = audio_track_info.get("Video_Delay") if audio_track_info else None
                if delay_in_seconds is not None:
                    try:
                        track_delay = round(float(delay_in_seconds) * 1000)
                    except Exception:
                        track_delay = 0

                print(f"Processing Audio Stream #{stream_index} (TID: {track_id}, Codec: {codec}, Channels: {channels})")
                if codec in REMUX_CODECS:
                    audio_tracks_to_remux.append(str(track_id))
                elif codec in CONVERT_CODECS:
                    opus_file = convert_audio_track(
                        stream_index, channels, language, audio_temp_dir, str(input_file_abs), not no_downmix
                    )
                    processed_audio_files.append({
                        "Path": opus_file,
                        "Language": language,
                        "Title": track_title,
                        "Delay": track_delay
                    })
                else:
                    print(f"Warning: Unsupported codec '{codec}'. Remuxing as is.", file=sys.stderr)
                    audio_tracks_to_remux.append(str(track_id))

            print("--- Finished Audio Processing ---")

            # Final mux
            print("Assembling final file with mkvmerge...")
            mkvmerge_args = ["mkvmerge", "-o", str(intermediate_output_file), str(created_encoded_video_path)]
            for file_info in processed_audio_files:
                sync_switch = ["--sync", f"0:{file_info['Delay']}"] if file_info["Delay"] else []
                mkvmerge_args += [
                    "--language", f"0:{file_info['Language']}",
                    "--track-name", f"0:{file_info['Title']}"
                ] + sync_switch + [str(file_info["Path"])]

            source_copy_args = ["--no-video"]
            if audio_tracks_to_remux:
                source_copy_args += ["--audio-tracks", ",".join(audio_tracks_to_remux)]
            else:
                source_copy_args += ["--no-audio"]
            mkvmerge_args += source_copy_args + [str(input_file_abs)]
            run_cmd(mkvmerge_args)

            # Move files
            print("Moving files to final destinations...")
            shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
            shutil.move(str(intermediate_output_file), DIR_COMPLETED / file_path.name)

        except Exception as e:
            print(f"An error occurred while processing '{file_path.name}': {e}", file=sys.stderr)
        finally:
            print("--- Starting Cleanup ---")
            print("  - Cleaning up disposable audio temporary directory...")
            if audio_temp_dir and Path(audio_temp_dir).exists():
                shutil.rmtree(audio_temp_dir, ignore_errors=True)

            print("  - Cleaning up intermediate output file (if any)...")
            intermediate_output_file.unlink(missing_ok=True)

            print("  - Cleaning up persistent video temporary files...")
            if created_ut_video_path and created_ut_video_path.exists():
                print(f"    Deleting UT video file: {created_ut_video_path}")
                created_ut_video_path.unlink(missing_ok=True)
            if created_encoded_video_path and created_encoded_video_path.exists():
                print(f"    Deleting encoded video temp file: {created_encoded_video_path}")
                created_encoded_video_path.unlink(missing_ok=True)

            print("  - Cleaning up AlabamaEncoder temporary directories...")
            for temp_dir_alabama in current_dir.glob('.alabamatemp-*'):
                if temp_dir_alabama.is_dir():
                    shutil.rmtree(temp_dir_alabama, ignore_errors=True)
            print("--- Finished Cleanup ---")

        runtime = datetime.now() - date
        runtime_str = str(runtime).split('.')[0] # Format to remove milliseconds
        print(f"Total runtime for {file_path.name}: {runtime_str}")

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Batch-process MKV files with resumable video encoding and audio downmixing.")
    parser.add_argument("--no-downmix", action="store_true", help="Preserve original audio channel layout.")
    args = parser.parse_args()
    main(no_downmix=args.no_downmix)