From f3f9767b0dc284eee9ff47e184ecc5dae1e2804b Mon Sep 17 00:00:00 2001 From: Rufei Zhou Date: Sun, 31 May 2026 10:52:46 -0700 Subject: [PATCH] update readme --- README.md | 23 ++++++++++++++--------- txtlyric_to_lrc/main.py | 4 +++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 15addcd..7b0bade 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,13 @@ txtlyric-to-lrc aligns lyrics to songs in the [lrc](https://en.wikipedia.org/wiki/LRC_(file_format)) synced lyric format. -This is a wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics. +This is a simple wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics. -Lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output `lrc` files will be written out alongside the original `txt` and media files. +Untimed lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output files will be written out alongside the original `txt` and media files using the same name. + +Given a pre-existing lrc file with line-level timings, specifying the `yaml` format will generate word-level timings that fit inside each line's timing bounds. + +Note: It is suggested to first convert plain lyrics to lrc files, adjust the line-level timings, and then add word-level timings by outputting to yaml files. ## How to Use ```sh @@ -12,10 +16,13 @@ $ python ./txtlyric_to_lrc/main.py -h Usage: main.py [OPTIONS] DIRECTORY Options: - -h, --help Show this message and exit. - -m, --model TEXT Which whisper model to use (choices are those of - whisper.available_models) - -l, --language TEXT What language the lyrics are in [required] + -h, --help Show this message and exit. + -m, --model TEXT Which whisper model to use (choices are those of + whisper.available_models) + -l, --language TEXT What language the lyrics are in [required] + -f, --format [yaml|lrc] Output format. 'yaml' writes a Lyricsfile (.yaml) + with word-level timings; 'lrc' writes classic LRC + (.lrc). [default: yaml] ``` ## How to Develop @@ -26,6 +33,4 @@ poetry install ``` ## TODO -* detect musical interludes (might already be possible in stable-ts) -* connect with lyric retrieval services -* support elrc word timings (need to check navidrome and subsonic clients won't throw a fit) \ No newline at end of file +* connect with lyric retrieval services \ No newline at end of file diff --git a/txtlyric_to_lrc/main.py b/txtlyric_to_lrc/main.py index 416e3b1..1f59ebe 100644 --- a/txtlyric_to_lrc/main.py +++ b/txtlyric_to_lrc/main.py @@ -88,6 +88,7 @@ def _align_from_txt(model, audio_path, txt_path, output_path, output_format, lan original_split=True, regroup=False, vad=True, + nonspeech_skip=None, # use vad to trim line timings but not skip internal non-speech sections ) if not result or not list(result.segments): @@ -123,7 +124,8 @@ def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, la audio_path, parsed.segments, language=language, - vad=True, + vad=False, # don't trim existing line timings + suppress_silence=False, # don't adjust word timestamps on detected silence regroup=False, )