Files
txtlyric-to-lrc/txtlyric_to_lrc/main.py
T

147 lines
4.1 KiB
Python
Raw Normal View History

2025-06-17 01:27:39 -07:00
import os
import click
import stable_whisper
from output import (
extract_metadata,
file_is_audio,
merge_metadata,
parse_lrc_file,
result_to_lrc,
result_to_lyricsfile,
)
OUTPUT_EXTENSIONS = {
"yaml": "yaml",
"lrc": "lrc",
}
2025-06-17 01:27:39 -07:00
@click.command()
2025-06-17 02:28:08 -07:00
@click.help_option("--help", "-h")
@click.option(
"--model",
"-m",
default="small",
help="Which whisper model to use (choices are those of whisper.available_models)",
)
@click.option(
"--language",
"-l",
required=True,
help="What language the lyrics are in",
)
@click.option(
"--format",
"-f",
"output_format",
type=click.Choice(["yaml", "lrc"], case_sensitive=False),
default="yaml",
show_default=True,
help="Output format. 'yaml' writes a Lyricsfile (.yaml) with word-level timings; 'lrc' writes classic LRC (.lrc).",
)
@click.argument("directory")
def main(model, language, output_format, directory):
output_format = output_format.lower()
output_ext = OUTPUT_EXTENSIONS[output_format]
2025-06-17 01:27:39 -07:00
print(f"Loading {model} model for use with language {language}...")
model = stable_whisper.load_model(model)
for (dirpath, _, filenames) in os.walk(directory):
if not any(file_is_audio(f) for f in filenames):
continue
for f in filenames:
if not file_is_audio(f):
continue
audio_path = os.path.join(dirpath, f)
base, _ = os.path.splitext(audio_path)
output_path = base + "." + output_ext
if os.path.exists(output_path):
print(f"Aligned lyric file already exists for {audio_path}: {output_path}")
continue
lrc_path = base + ".lrc"
txt_path = base + ".txt"
if os.path.exists(lrc_path):
_refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language)
elif os.path.exists(txt_path):
_align_from_txt(model, audio_path, txt_path, output_path, output_format, language)
else:
print(f"No .lrc or .txt sidecar found for {audio_path}")
def _align_from_txt(model, audio_path, txt_path, output_path, output_format, language):
"""Full alignment from plain text: determines line boundaries and word timings."""
print(f"Aligning lyrics for {audio_path} (from .txt)")
with open(txt_path) as fh:
unaligned_text = fh.read()
result = model.align(
audio_path,
unaligned_text,
language=language,
original_split=True,
regroup=False,
vad=True,
)
if not result or not list(result.segments):
print(f" Alignment produced no segments; skipping")
return
result.adjust_gaps(one_section=True)
if output_format == "yaml":
metadata = extract_metadata(audio_path, language=language)
content = result_to_lyricsfile(result, metadata)
else:
content = result_to_lrc(result)
print(f" Writing aligned lyrics at: {output_path}")
with open(output_path, "x") as out_fh:
out_fh.write(content)
def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language):
"""Word-level refinement of existing LRC: keeps line boundaries, adds word timings."""
print(f"Refining lyrics for {audio_path} (from .lrc)")
with open(lrc_path) as fh:
lrc_content = fh.read()
parsed = parse_lrc_file(lrc_content)
if not parsed.segments:
print(f" No timed lines found in {lrc_path}; skipping")
return
result = model.align_words(
audio_path,
parsed.segments,
language=language,
vad=True,
regroup=False,
)
if not result or not list(result.segments):
print(f" Word alignment produced no segments; skipping")
return
if output_format == "yaml":
metadata = merge_metadata(audio_path, parsed, cli_language=language)
content = result_to_lyricsfile(result, metadata)
else:
content = result_to_lrc(result)
print(f" Writing refined lyrics at: {output_path}")
with open(output_path, "x") as out_fh:
out_fh.write(content)
2025-06-17 01:27:39 -07:00
if __name__ == "__main__":
main()