147 lines
4.1 KiB
Python
147 lines
4.1 KiB
Python
import os
|
|
|
|
import click
|
|
import stable_whisper
|
|
|
|
from output import (
|
|
extract_metadata,
|
|
file_is_audio,
|
|
merge_metadata,
|
|
parse_lrc_file,
|
|
result_to_lrc,
|
|
result_to_lyricsfile,
|
|
)
|
|
|
|
|
|
OUTPUT_EXTENSIONS = {
|
|
"yaml": "yaml",
|
|
"lrc": "lrc",
|
|
}
|
|
|
|
|
|
@click.command()
|
|
@click.help_option("--help", "-h")
|
|
@click.option(
|
|
"--model",
|
|
"-m",
|
|
default="small",
|
|
help="Which whisper model to use (choices are those of whisper.available_models)",
|
|
)
|
|
@click.option(
|
|
"--language",
|
|
"-l",
|
|
required=True,
|
|
help="What language the lyrics are in",
|
|
)
|
|
@click.option(
|
|
"--format",
|
|
"-f",
|
|
"output_format",
|
|
type=click.Choice(["yaml", "lrc"], case_sensitive=False),
|
|
default="yaml",
|
|
show_default=True,
|
|
help="Output format. 'yaml' writes a Lyricsfile (.yaml) with word-level timings; 'lrc' writes classic LRC (.lrc).",
|
|
)
|
|
@click.argument("directory")
|
|
def main(model, language, output_format, directory):
|
|
output_format = output_format.lower()
|
|
output_ext = OUTPUT_EXTENSIONS[output_format]
|
|
|
|
print(f"Loading {model} model for use with language {language}...")
|
|
model = stable_whisper.load_model(model)
|
|
|
|
for (dirpath, _, filenames) in os.walk(directory):
|
|
if not any(file_is_audio(f) for f in filenames):
|
|
continue
|
|
for f in filenames:
|
|
if not file_is_audio(f):
|
|
continue
|
|
audio_path = os.path.join(dirpath, f)
|
|
base, _ = os.path.splitext(audio_path)
|
|
output_path = base + "." + output_ext
|
|
|
|
if os.path.exists(output_path):
|
|
print(f"Aligned lyric file already exists for {audio_path}: {output_path}")
|
|
continue
|
|
|
|
lrc_path = base + ".lrc"
|
|
txt_path = base + ".txt"
|
|
|
|
if os.path.exists(lrc_path):
|
|
_refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language)
|
|
elif os.path.exists(txt_path):
|
|
_align_from_txt(model, audio_path, txt_path, output_path, output_format, language)
|
|
else:
|
|
print(f"No .lrc or .txt sidecar found for {audio_path}")
|
|
|
|
|
|
def _align_from_txt(model, audio_path, txt_path, output_path, output_format, language):
|
|
"""Full alignment from plain text: determines line boundaries and word timings."""
|
|
print(f"Aligning lyrics for {audio_path} (from .txt)")
|
|
with open(txt_path) as fh:
|
|
unaligned_text = fh.read()
|
|
|
|
result = model.align(
|
|
audio_path,
|
|
unaligned_text,
|
|
language=language,
|
|
original_split=True,
|
|
regroup=False,
|
|
vad=True,
|
|
)
|
|
|
|
if not result or not list(result.segments):
|
|
print(f" Alignment produced no segments; skipping")
|
|
return
|
|
|
|
result.adjust_gaps(one_section=True)
|
|
|
|
if output_format == "yaml":
|
|
metadata = extract_metadata(audio_path, language=language)
|
|
content = result_to_lyricsfile(result, metadata)
|
|
else:
|
|
content = result_to_lrc(result)
|
|
|
|
print(f" Writing aligned lyrics at: {output_path}")
|
|
with open(output_path, "x") as out_fh:
|
|
out_fh.write(content)
|
|
|
|
|
|
def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language):
|
|
"""Word-level refinement of existing LRC: keeps line boundaries, adds word timings."""
|
|
print(f"Refining lyrics for {audio_path} (from .lrc)")
|
|
with open(lrc_path) as fh:
|
|
lrc_content = fh.read()
|
|
|
|
parsed = parse_lrc_file(lrc_content)
|
|
|
|
if not parsed.segments:
|
|
print(f" No timed lines found in {lrc_path}; skipping")
|
|
return
|
|
|
|
result = model.align_words(
|
|
audio_path,
|
|
parsed.segments,
|
|
language=language,
|
|
vad=True,
|
|
regroup=False,
|
|
)
|
|
|
|
if not result or not list(result.segments):
|
|
print(f" Word alignment produced no segments; skipping")
|
|
return
|
|
|
|
if output_format == "yaml":
|
|
metadata = merge_metadata(audio_path, parsed, cli_language=language)
|
|
content = result_to_lyricsfile(result, metadata)
|
|
else:
|
|
content = result_to_lrc(result)
|
|
|
|
print(f" Writing refined lyrics at: {output_path}")
|
|
with open(output_path, "x") as out_fh:
|
|
out_fh.write(content)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|