Files
encoding-scripts/tv_audio_encoder.py

579 lines
25 KiB
Python

#!/usr/bin/env python3
import os
import sys
import subprocess
import shutil
import tempfile
import json
from datetime import datetime
from pathlib import Path
class Tee:
def __init__(self, *files):
self.files = files
def write(self, obj):
for f in self.files:
f.write(obj)
f.flush()
def flush(self):
for f in self.files:
f.flush()
REQUIRED_TOOLS_MAP = {
"ffmpeg": "extra/ffmpeg",
"ffprobe": "extra/ffmpeg", # Part of ffmpeg package
"mkvmerge": "extra/mkvtoolnix-cli",
"mkvpropedit": "extra/mkvtoolnix-cli", # Part of mkvtoolnix-cli
"sox": "extra/sox",
"opusenc": "extra/opus-tools",
"mediainfo": "extra/mediainfo",
"alabamaEncoder": "pipx install alabamaEncoder"
}
DIR_COMPLETED = Path("completed")
DIR_ORIGINAL = Path("original")
DIR_LOGS = Path("conv_logs")
REMUX_CODECS = {"aac", "opus"} # Using a set for efficient lookups
# Removed CONVERT_CODECS, now all non-remux codecs will be converted
def check_tools():
if sys.platform == "win32":
print("ERROR: This script is not supported on Windows due to alabamaEncoder compatibility.")
print("Please run this script on Linux or macOS.")
sys.exit(1)
for tool_exe, package_name in REQUIRED_TOOLS_MAP.items():
if shutil.which(tool_exe) is None:
print(f"Required tool '{tool_exe}' not found. On Arch Linux, try installing '{package_name}'.")
sys.exit(1)
def run_cmd(cmd, capture_output=False, check=True):
if capture_output:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=check, text=True)
return result.stdout
else:
subprocess.run(cmd, check=check)
def convert_audio_track(index, ch, lang, audio_temp_dir, source_file, should_downmix):
audio_temp_path = Path(audio_temp_dir)
temp_extracted = audio_temp_path / f"track_{index}_extracted.flac"
temp_normalized = audio_temp_path / f"track_{index}_normalized.flac"
final_opus = audio_temp_path / f"track_{index}_final.opus"
print(f" - Extracting Audio Track #{index} to FLAC...")
ffmpeg_args = [
"ffmpeg", "-v", "quiet", "-stats", "-y", "-i", str(source_file), "-map", f"0:{index}"
]
if should_downmix and ch >= 6:
if ch == 6:
ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4|c1=c2+0.30*c1+0.30*c5"]
elif ch == 8:
ffmpeg_args += ["-af", "pan=stereo|c0=c2+0.30*c0+0.30*c4+0.30*c6|c1=c2+0.30*c1+0.30*c5+0.30*c7"]
else: # Other multi-channel (e.g. 7ch, 10ch)
ffmpeg_args += ["-ac", "2"]
ffmpeg_args += ["-c:a", "flac", str(temp_extracted)]
run_cmd(ffmpeg_args)
print(f" - Normalizing Audio Track #{index} with SoX...")
run_cmd([
"sox", str(temp_extracted), str(temp_normalized), "-S", "--temp", str(audio_temp_path), "--guard", "gain", "-n"
])
# Set bitrate based on the final channel count of the Opus file.
# If we are downmixing, the result is stereo.
# If not, the result has the original channel count.
is_being_downmixed = should_downmix and ch >= 6
if is_being_downmixed:
# Downmixing from 5.1 or 7.1 results in a stereo track.
bitrate = "128k"
else:
# Not downmixing (or source is already stereo or less).
# Base bitrate on the source channel count.
if ch == 1: # Mono
bitrate = "64k"
elif ch == 2: # Stereo
bitrate = "128k"
elif ch == 6: # 5.1 Surround
bitrate = "256k"
elif ch == 8: # 7.1 Surround
bitrate = "384k"
else: # Other layouts
bitrate = "96k" # A sensible default for other/uncommon layouts.
print(f" - Encoding Audio Track #{index} to Opus at {bitrate}...")
run_cmd([
"opusenc", "--vbr", "--bitrate", bitrate, str(temp_normalized), str(final_opus)
])
return final_opus
def convert_video(source_file_base, source_file_full, autocrop_filter=None):
print(" --- Starting Video Processing ---")
# source_file_base is the full stem from the original file,
# e.g., "cheers.s01e04.der.lueckenbuesser.german.dl.fs.1080p.web.h264-cnhd"
ut_video_file = Path(f"{source_file_base}.ut.mkv")
encoded_video_file = Path(f"temp-{source_file_base}.mkv")
print(" - Creating UTVideo intermediate file (overwriting if exists)...")
# Check if source is already UTVideo
ffprobe_cmd = [
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1",
source_file_full
]
source_codec = run_cmd(ffprobe_cmd, capture_output=True, check=True).strip()
video_codec_args = ["-c:v", "utvideo"]
if source_codec == "utvideo":
print(" - Source is already UTVideo. Copying video stream...")
video_codec_args = ["-c:v", "copy"]
ffmpeg_args = [
"ffmpeg", "-hide_banner", "-v", "quiet", "-stats", "-y", "-i", source_file_full,
"-map", "0:v:0", "-map_metadata", "-1", "-map_chapters", "-1", "-an", "-sn", "-dn",
]
if autocrop_filter:
ffmpeg_args += ["-vf", autocrop_filter]
ffmpeg_args += video_codec_args + [str(ut_video_file)]
run_cmd(ffmpeg_args)
print(" - Starting video encode with AlabamaEncoder (this will take a long time)...")
# Note: AlabamaEncoder options like --vmaf_target are used here.
# You might want to adjust them based on your specific needs.
# Resumability and specific SVT-AV1 parameters previously used with av1an
# are not directly translated here as AlabamaEncoder handles encoding differently.
alabama_encoder_args = [
"alabamaEncoder", "encode",
str(ut_video_file), # This is the UT video file created by ffmpeg
str(encoded_video_file),
"--grain", "-2", # Example option, adjust as needed
"--vmaf_target", "96", # Example option, adjust as needed
"--dont_encode_audio" # Important as audio is processed separately
]
run_cmd(alabama_encoder_args)
print(" - Cleaning metadata with mkvpropedit...")
propedit_args = [
"mkvpropedit",
str(encoded_video_file),
"--tags", "global:",
"-d", "title"
]
run_cmd(propedit_args)
print(" --- Finished Video Processing ---")
return ut_video_file, encoded_video_file
# --- CROPDETECT LOGIC FROM cropdetect.py ---
import multiprocessing as _multiprocessing_cropdetect
from collections import Counter as _Counter_cropdetect
KNOWN_ASPECT_RATIOS = [
{"name": "HDTV (16:9)", "ratio": 16/9},
{"name": "Widescreen (Scope)", "ratio": 2.39},
{"name": "Widescreen (Flat)", "ratio": 1.85},
{"name": "IMAX Digital (1.90:1)", "ratio": 1.90},
{"name": "Fullscreen (4:3)", "ratio": 4/3},
{"name": "IMAX 70mm (1.43:1)", "ratio": 1.43},
]
def _check_prerequisites_cropdetect():
for tool in ['ffmpeg', 'ffprobe']:
if not shutil.which(tool):
print(f"Error: '{tool}' command not found. Is it installed and in your PATH?")
return False
return True
def _analyze_segment_cropdetect(task_args):
seek_time, input_file, width, height = task_args
ffmpeg_args = [
'ffmpeg', '-hide_banner',
'-ss', str(seek_time),
'-i', input_file, '-t', '1', '-vf', 'cropdetect',
'-f', 'null', '-'
]
result = subprocess.run(ffmpeg_args, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
import re
crop_detections = re.findall(r'crop=(\d+):(\d+):(\d+):(\d+)', result.stderr)
significant_crops = []
for w_str, h_str, x_str, y_str in crop_detections:
w, h, x, y = map(int, [w_str, h_str, x_str, y_str])
significant_crops.append((f"crop={w}:{h}:{x}:{y}", seek_time))
return significant_crops
def _snap_to_known_ar_cropdetect(w, h, x, y, video_w, video_h, tolerance=0.03):
if h == 0: return f"crop={w}:{h}:{x}:{y}", None
detected_ratio = w / h
best_match = None
smallest_diff = float('inf')
for ar in KNOWN_ASPECT_RATIOS:
diff = abs(detected_ratio - ar['ratio'])
if diff < smallest_diff:
smallest_diff = diff
best_match = ar
if not best_match or (smallest_diff / best_match['ratio']) >= tolerance:
return f"crop={w}:{h}:{x}:{y}", None
if abs(w - video_w) < 16:
new_h = round(video_w / best_match['ratio'])
if new_h % 8 != 0:
new_h = new_h + (8 - (new_h % 8))
new_y = round((video_h - new_h) / 2)
if new_y % 2 != 0:
new_y -= 1
return f"crop={video_w}:{new_h}:0:{new_y}", best_match['name']
if abs(h - video_h) < 16:
new_w = round(video_h * best_match['ratio'])
if new_w % 8 != 0:
new_w = new_w + (8 - (new_w % 8))
new_x = round((video_w - new_w) / 2)
if new_x % 2 != 0:
new_x -= 1
return f"crop={new_w}:{video_h}:{new_x}:0", best_match['name']
return f"crop={w}:{h}:{x}:{y}", None
def _cluster_crop_values_cropdetect(crop_counts, tolerance=8):
clusters = []
temp_counts = crop_counts.copy()
while temp_counts:
center_str, _ = temp_counts.most_common(1)[0]
try:
_, values = center_str.split('=');
cw, ch, cx, cy = map(int, values.split(':'))
except (ValueError, IndexError):
del temp_counts[center_str]
continue
cluster_total_count = 0
crops_to_remove = []
for crop_str, count in temp_counts.items():
try:
_, values = crop_str.split('=');
w, h, x, y = map(int, values.split(':'))
if abs(x - cx) <= tolerance and abs(y - cy) <= tolerance:
cluster_total_count += count
crops_to_remove.append(crop_str)
except (ValueError, IndexError):
continue
if cluster_total_count > 0:
clusters.append({'center': center_str, 'count': cluster_total_count})
for crop_str in crops_to_remove:
del temp_counts[crop_str]
clusters.sort(key=lambda c: c['count'], reverse=True)
return clusters
def _parse_crop_string_cropdetect(crop_str):
try:
_, values = crop_str.split('=');
w, h, x, y = map(int, values.split(':'))
return {'w': w, 'h': h, 'x': x, 'y': y}
except (ValueError, IndexError):
return None
def _calculate_bounding_box_cropdetect(crop_keys):
min_x = min_w = min_y = min_h = float('inf')
max_x = max_w = max_y = max_h = float('-inf')
for key in crop_keys:
parsed = _parse_crop_string_cropdetect(key)
if not parsed:
continue
w, h, x, y = parsed['w'], parsed['h'], parsed['x'], parsed['y']
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x + w)
max_y = max(max_y, y + h)
min_w = min(min_w, w)
min_h = min(min_h, h)
max_w = max(max_w, w)
max_h = max(max_h, h)
if (max_x - min_x) <= 2 and (max_y - min_y) <= 2:
return None
bounding_crop = f"crop={max_x - min_x}:{max_y - min_y}:{min_x}:{min_y}"
return bounding_crop
def _analyze_video_cropdetect(input_file, duration, width, height, num_workers, significant_crop_threshold, min_crop, debug=False):
num_tasks = num_workers * 4
segment_duration = max(1, duration // num_tasks)
tasks = [(i * segment_duration, input_file, width, height) for i in range(num_tasks)]
crop_results = []
with _multiprocessing_cropdetect.Pool(processes=num_workers) as pool:
results_iterator = pool.imap_unordered(_analyze_segment_cropdetect, tasks)
for result in results_iterator:
crop_results.append(result)
all_crops_with_ts = [crop for sublist in crop_results for crop in sublist]
all_crop_strings = [item[0] for item in all_crops_with_ts]
if not all_crop_strings:
return None
crop_counts = _Counter_cropdetect(all_crop_strings)
clusters = _cluster_crop_values_cropdetect(crop_counts)
total_detections = sum(c['count'] for c in clusters)
significant_clusters = []
for cluster in clusters:
percentage = (cluster['count'] / total_detections) * 100
if percentage >= significant_crop_threshold:
significant_clusters.append(cluster)
for cluster in significant_clusters:
parsed_crop = _parse_crop_string_cropdetect(cluster['center'])
if parsed_crop:
_, ar_label = _snap_to_known_ar_cropdetect(
parsed_crop['w'], parsed_crop['h'], parsed_crop['x'], parsed_crop['y'], width, height
)
cluster['ar_label'] = ar_label
else:
cluster['ar_label'] = None
if not significant_clusters:
return None
elif len(significant_clusters) == 1:
dominant_cluster = significant_clusters[0]
parsed_crop = _parse_crop_string_cropdetect(dominant_cluster['center'])
snapped_crop, ar_label = _snap_to_known_ar_cropdetect(
parsed_crop['w'], parsed_crop['h'], parsed_crop['x'], parsed_crop['y'], width, height
)
parsed_snapped = _parse_crop_string_cropdetect(snapped_crop)
if parsed_snapped and parsed_snapped['w'] == width and parsed_snapped['h'] == height:
return None
else:
return snapped_crop
else:
crop_keys = [c['center'] for c in significant_clusters]
bounding_box_crop = _calculate_bounding_box_cropdetect(crop_keys)
if bounding_box_crop:
parsed_bb = _parse_crop_string_cropdetect(bounding_box_crop)
snapped_crop, ar_label = _snap_to_known_ar_cropdetect(
parsed_bb['w'], parsed_bb['h'], parsed_bb['x'], parsed_bb['y'], width, height
)
parsed_snapped = _parse_crop_string_cropdetect(snapped_crop)
if parsed_snapped and parsed_snapped['w'] == width and parsed_snapped['h'] == height:
return None
else:
return snapped_crop
else:
return None
def detect_autocrop_filter(input_file, significant_crop_threshold=5.0, min_crop=10, debug=False):
if not _check_prerequisites_cropdetect():
return None
try:
probe_duration_args = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1',
input_file
]
duration_str = subprocess.check_output(probe_duration_args, stderr=subprocess.STDOUT, text=True)
duration = int(float(duration_str))
probe_res_args = [
'ffprobe', '-v', 'error',
'-select_streams', 'v',
'-show_entries', 'stream=width,height,disposition',
'-of', 'json',
input_file
]
probe_output = subprocess.check_output(probe_res_args, stderr=subprocess.STDOUT, text=True)
streams_data = json.loads(probe_output)
video_stream = None
for stream in streams_data.get('streams', []):
if stream.get('disposition', {}).get('attached_pic', 0) == 0:
video_stream = stream
break
if not video_stream or 'width' not in video_stream or 'height' not in video_stream:
return None
width = int(video_stream['width'])
height = int(video_stream['height'])
except Exception:
return None
return _analyze_video_cropdetect(input_file, duration, width, height, max(1, os.cpu_count() // 2), significant_crop_threshold, min_crop, debug)
def main(no_downmix=False, autocrop=False):
check_tools()
current_dir = Path(".")
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-"))
)
if not files_to_process:
print("No MKV files found to process. Exiting.")
return
DIR_COMPLETED.mkdir(exist_ok=True, parents=True)
DIR_ORIGINAL.mkdir(exist_ok=True, parents=True)
DIR_LOGS.mkdir(exist_ok=True, parents=True)
while True:
files_to_process = sorted(
f for f in current_dir.glob("*.mkv")
if not (f.name.endswith(".ut.mkv") or f.name.startswith("temp-") or f.name.startswith("output-"))
)
if not files_to_process:
print("No more .mkv files found to process in the current directory. The script will now exit.")
break
file_path = files_to_process[0]
log_file_path = DIR_LOGS / f"{file_path.name}.log"
log_file = open(log_file_path, 'w', encoding='utf-8')
original_stdout = sys.stdout
original_stderr = sys.stderr
sys.stdout = Tee(original_stdout, log_file)
sys.stderr = Tee(original_stderr, log_file)
try:
print("-" * shutil.get_terminal_size(fallback=(80, 24)).columns)
print(f"Starting full processing for: {file_path.name}")
date = datetime.now()
input_file_abs = file_path.resolve()
intermediate_output_file = current_dir / f"output-{file_path.name}"
audio_temp_dir = None
created_ut_video_path = None
created_encoded_video_path = None
try:
audio_temp_dir = tempfile.mkdtemp(prefix="tv_audio_")
print(f"Audio temporary directory created at: {audio_temp_dir}")
print(f"Analyzing file: {input_file_abs}")
ffprobe_info_json = run_cmd([
"ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(input_file_abs)
], capture_output=True)
ffprobe_info = json.loads(ffprobe_info_json)
mkvmerge_info_json = run_cmd([
"mkvmerge", "-J", str(input_file_abs)
], capture_output=True)
mkv_info = json.loads(mkvmerge_info_json)
mediainfo_json = run_cmd([
"mediainfo", "--Output=JSON", "-f", str(input_file_abs)
], capture_output=True)
media_info = json.loads(mediainfo_json)
autocrop_filter = None
if autocrop:
print("--- Running autocrop detection ---")
autocrop_filter = detect_autocrop_filter(str(input_file_abs))
if autocrop_filter:
print(f" - Autocrop filter detected: {autocrop_filter}")
else:
print(" - No crop needed or detected.")
created_ut_video_path, created_encoded_video_path = convert_video(file_path.stem, str(input_file_abs), autocrop_filter=autocrop_filter)
print("--- Starting Audio Processing ---")
processed_audio_files = []
audio_tracks_to_remux = []
audio_streams = [s for s in ffprobe_info.get("streams", []) if s.get("codec_type") == "audio"]
# Build mkvmerge audio track list
mkv_audio_tracks_list = [t for t in mkv_info.get("tracks", []) if t.get("type") == "audio"]
# Build mediainfo track mapping by StreamOrder
media_tracks_data = media_info.get("media", {}).get("track", [])
mediainfo_audio_tracks = {int(t.get("StreamOrder", -1)): t for t in media_tracks_data if t.get("@type") == "Audio"}
for audio_idx, stream in enumerate(audio_streams):
stream_index = stream["index"]
codec = stream.get("codec_name")
channels = stream.get("channels", 2)
language = stream.get("tags", {}).get("language", "und")
# More robustly find the mkvmerge track by matching ffprobe's stream index
# to mkvmerge's 'stream_id' property.
mkv_track = next((t for t in mkv_info.get("tracks", []) if t.get("properties", {}).get("stream_id") == stream_index), None)
if not mkv_track:
# Fallback to the less reliable index-based method if stream_id isn't found
mkv_track = mkv_audio_tracks_list[audio_idx] if audio_idx < len(mkv_audio_tracks_list) else {}
track_id = mkv_track.get("id", -1)
track_title = mkv_track.get("properties", {}).get("track_name", "")
track_delay = 0
audio_track_info = mediainfo_audio_tracks.get(stream_index)
delay_raw = audio_track_info.get("Video_Delay") if audio_track_info else None
if delay_raw is not None:
try:
delay_val = float(delay_raw)
if delay_val < 1:
track_delay = int(round(delay_val * 1000))
else:
track_delay = int(round(delay_val))
except Exception:
track_delay = 0
print(f"Processing Audio Stream #{stream_index} (TID: {track_id}, Codec: {codec}, Channels: {channels})")
if codec in REMUX_CODECS:
audio_tracks_to_remux.append(str(track_id))
else:
opus_file = convert_audio_track(
stream_index, channels, language, audio_temp_dir, str(input_file_abs), not no_downmix
)
processed_audio_files.append({
"Path": opus_file,
"Language": language,
"Title": track_title,
"Delay": track_delay
})
print("--- Finished Audio Processing ---")
# Final mux
print("Assembling final file with mkvmerge...")
mkvmerge_args = ["mkvmerge", "-o", str(intermediate_output_file), str(created_encoded_video_path)]
for file_info in processed_audio_files:
mkvmerge_args.extend(["--language", f"0:{file_info['Language']}"])
if file_info['Title']: # Only add track name if it exists
mkvmerge_args.extend(["--track-name", f"0:{file_info['Title']}"])
if file_info['Delay']:
mkvmerge_args.extend(["--sync", f"0:{file_info['Delay']}"])
mkvmerge_args.append(str(file_info["Path"]))
source_copy_args = ["--no-video"]
if audio_tracks_to_remux:
source_copy_args += ["--audio-tracks", ",".join(audio_tracks_to_remux)]
else:
source_copy_args += ["--no-audio"]
mkvmerge_args += source_copy_args + [str(input_file_abs)]
run_cmd(mkvmerge_args)
# Move files
print("Moving files to final destinations...")
shutil.move(str(file_path), DIR_ORIGINAL / file_path.name)
shutil.move(str(intermediate_output_file), DIR_COMPLETED / file_path.name)
except Exception as e:
print(f"An error occurred while processing '{file_path.name}': {e}", file=sys.stderr)
finally:
print("--- Starting Cleanup ---")
if audio_temp_dir and Path(audio_temp_dir).exists():
print(" - Cleaning up disposable audio temporary directory...")
shutil.rmtree(audio_temp_dir, ignore_errors=True)
if intermediate_output_file.exists():
print(" - Cleaning up intermediate output file...")
intermediate_output_file.unlink()
print(" - Cleaning up persistent video temporary files...")
if created_ut_video_path and created_ut_video_path.exists():
print(f" - Deleting UT video file: {created_ut_video_path}")
created_ut_video_path.unlink()
if created_encoded_video_path and created_encoded_video_path.exists():
print(f" - Deleting encoded video temp file: {created_encoded_video_path}")
created_encoded_video_path.unlink()
alabama_dirs = list(current_dir.glob('.alabamatemp-*'))
if alabama_dirs:
print(" - Cleaning up AlabamaEncoder temporary directories...")
for temp_dir_alabama in alabama_dirs:
if temp_dir_alabama.is_dir():
shutil.rmtree(temp_dir_alabama, ignore_errors=True)
print("--- Finished Cleanup ---")
runtime = datetime.now() - date
runtime_str = str(runtime).split('.')[0] # Format to remove milliseconds
print(f"Total runtime for {file_path.name}: {runtime_str}")
finally:
# Restore stdout/stderr and close log file
sys.stdout = original_stdout
sys.stderr = original_stderr
log_file.close()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Batch-process MKV files with resumable video encoding and audio downmixing, with optional autocrop.")
parser.add_argument("--no-downmix", action="store_true", help="Preserve original audio channel layout.")
parser.add_argument("--autocrop", action="store_true", help="Automatically detect and crop black bars from video using cropdetect.")
args = parser.parse_args()
main(no_downmix=args.no_downmix, autocrop=args.autocrop)