From f3f9767b0dc284eee9ff47e184ecc5dae1e2804b Mon Sep 17 00:00:00 2001
From: Rufei Zhou <rufei.z@gmail.com>
Date: Sun, 31 May 2026 10:52:46 -0700
Subject: [PATCH] update readme

---
 README.md               | 23 ++++++++++++++---------
 txtlyric_to_lrc/main.py |  4 +++-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 15addcd..7b0bade 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,13 @@
 
 txtlyric-to-lrc aligns lyrics to songs in the [lrc](https://en.wikipedia.org/wiki/LRC_(file_format)) synced lyric format.
 
-This is a wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics. 
+This is a simple wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics. 
 
-Lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output `lrc` files will be written out alongside the original `txt` and media files.
+Untimed lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output files will be written out alongside the original `txt` and media files using the same name.
+
+Given a pre-existing lrc file with line-level timings, specifying the `yaml` format will generate word-level timings that fit inside each line's timing bounds.
+
+Note: It is suggested to first convert plain lyrics to lrc files, adjust the line-level timings, and then add word-level timings by outputting to yaml files.
 
 ## How to Use
 ```sh
@@ -12,10 +16,13 @@ $ python ./txtlyric_to_lrc/main.py -h
 Usage: main.py [OPTIONS] DIRECTORY
 
 Options:
-  -h, --help           Show this message and exit.
-  -m, --model TEXT     Which whisper model to use (choices are those of
-                       whisper.available_models)
-  -l, --language TEXT  What language the lyrics are in  [required]
+  -h, --help               Show this message and exit.
+  -m, --model TEXT         Which whisper model to use (choices are those of
+                           whisper.available_models)
+  -l, --language TEXT      What language the lyrics are in  [required]
+  -f, --format [yaml|lrc]  Output format. 'yaml' writes a Lyricsfile (.yaml)
+                           with word-level timings; 'lrc' writes classic LRC
+                           (.lrc).  [default: yaml]
 ```
 
 ## How to Develop
@@ -26,6 +33,4 @@ poetry install
 ```
 
 ## TODO
-* detect musical interludes (might already be possible in stable-ts)
-* connect with lyric retrieval services
-* support elrc word timings (need to check navidrome and subsonic clients won't throw a fit)
\ No newline at end of file
+* connect with lyric retrieval services
\ No newline at end of file
diff --git a/txtlyric_to_lrc/main.py b/txtlyric_to_lrc/main.py
index 416e3b1..1f59ebe 100644
--- a/txtlyric_to_lrc/main.py
+++ b/txtlyric_to_lrc/main.py
@@ -88,6 +88,7 @@ def _align_from_txt(model, audio_path, txt_path, output_path, output_format, lan
         original_split=True,
         regroup=False,
         vad=True,
+        nonspeech_skip=None, # use vad to trim line timings but not skip internal non-speech sections
     )
 
     if not result or not list(result.segments):
@@ -123,7 +124,8 @@ def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, la
         audio_path,
         parsed.segments,
         language=language,
-        vad=True,
+        vad=False, # don't trim existing line timings
+        suppress_silence=False, # don't adjust word timestamps on detected silence
         regroup=False,
     )