Replaced "sox" with "ffmpeg" for audio normalization

2025-10-24 17:13:44 +02:00
parent 8fed780dce
commit 46fa0896ae
2 changed files with 40 additions and 7 deletions
--- a/MkvOpusEnc.py
+++ b/MkvOpusEnc.py
@@ -32,7 +32,7 @@ class Tee:

 def check_tools():
    """Checks if all required command-line tools are in the system's PATH."""
-    required_tools = ["ffmpeg", "ffprobe", "mkvmerge", "sox_ng", "opusenc", "mediainfo"]
+    required_tools = ["ffmpeg", "ffprobe", "mkvmerge", "opusenc", "mediainfo"]
    print("--- Prerequisite Check ---")
    all_found = True
    for tool in required_tools:
@@ -50,7 +50,7 @@ def run_cmd(args, capture_output=False, check=True):

 def convert_audio_track(stream_index, channels, temp_dir, source_file, should_downmix, bitrate_info):
    """Extracts, normalizes, and encodes a single audio track to Opus."""
-    temp_extracted = temp_dir / f"track_{stream_index}_extracted.flac"
+    temp_extracted = temp_dir / f"track_{stream_index}_extracted.flac" # This will be the input for loudnorm pass 1
    temp_normalized = temp_dir / f"track_{stream_index}_normalized.flac"
    final_opus = temp_dir / f"track_{stream_index}_final.opus"

@@ -78,9 +78,43 @@ def convert_audio_track(stream_index, channels, temp_dir, source_file, should_do
    ffmpeg_args.extend(["-c:a", "flac", str(temp_extracted)])
    run_cmd(ffmpeg_args)

-    # Step 2: Normalize the track with SoX NG
-    print("    - Normalizing with SoX NG...")
-    run_cmd(["sox_ng", str(temp_extracted), str(temp_normalized), "-S", "--temp", str(temp_dir), "--guard", "gain", "-n"])
+    # Step 2: Normalize the track with ffmpeg (loudnorm 2-pass)
+    print("    - Normalizing Audio Track with ffmpeg (loudnorm 2-pass)...")
+    # First pass: Analyze the audio to get loudnorm stats
+    # The stats are printed to stderr, so we must use subprocess.run directly to capture it.
+    print("      - Pass 1: Analyzing...")
+    result = subprocess.run(
+        ["ffmpeg", "-v", "info", "-i", str(temp_extracted), "-af", "loudnorm=I=-18:LRA=7:tp=-1:print_format=json", "-f", "null", "-"],
+        capture_output=True, text=True, check=True)
+    
+    # Find the start of the JSON block in stderr and parse it.
+    # This is more robust than slicing the last N lines.
+    # We find the start and end of the JSON block to avoid parsing extra data.
+    stderr_output = result.stderr
+    json_start_index = stderr_output.find('{')
+    if json_start_index == -1:
+        raise ValueError("Could not find start of JSON block in ffmpeg output for loudnorm analysis.")
+
+    brace_level = 0
+    json_end_index = -1
+    for i, char in enumerate(stderr_output[json_start_index:]):
+        if char == '{':
+            brace_level += 1
+        elif char == '}':
+            brace_level -= 1
+            if brace_level == 0:
+                json_end_index = json_start_index + i + 1
+                break
+    
+    stats = json.loads(stderr_output[json_start_index:json_end_index])
+
+    # Second pass: Apply the normalization using the stats from the first pass
+    print("      - Pass 2: Applying normalization...")
+    run_cmd([
+        "ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(temp_extracted), "-af",
+        f"loudnorm=I=-18:LRA=7:tp=-1:measured_i={stats['input_i']}:measured_lra={stats['input_lra']}:measured_tp={stats['input_tp']}:measured_thresh={stats['input_thresh']}:offset={stats['target_offset']}",
+        "-c:a", "flac", str(temp_normalized)
+    ])

    # Step 3: Encode to Opus with the correct bitrate
    bitrate = "192k"  # Fallback