Compare commits
4 Commits
feb7fd6eca
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 17af31dfaf | |||
| f3f9767b0d | |||
| cc7928a532 | |||
| 9161ab0b24 |
@@ -0,0 +1,2 @@
|
|||||||
|
__pycache__/
|
||||||
|
.venv/
|
||||||
@@ -1,10 +1,14 @@
|
|||||||
# txtlyric-to-lrc
|
# txtlyric-to-lrc
|
||||||
|
|
||||||
txtlyric-to-lrc aligns lyrics to songs in the [lrc](https://en.wikipedia.org/wiki/LRC_(file_format)) synced lyric format.
|
txtlyric-to-lrc aligns lyrics to songs in the [lyricsfile](https://github.com/wilywyrm/lrcget/blob/main/LYRICSFILE_CONCEPT.md) and [lrc](https://en.wikipedia.org/wiki/LRC_(file_format)) synced lyric formats.
|
||||||
|
|
||||||
This is a wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics.
|
This is a simple wrapper around [stable-ts](https://github.com/jianfch/stable-ts) ([whisper](https://github.com/openai/whisper)) that is designed to be used with known-good transcriptions like untimed song lyrics.
|
||||||
|
|
||||||
Lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output `lrc` files will be written out alongside the original `txt` and media files.
|
Untimed lyrics are expected to be placed alongside media files in `txt` files, and lines of lyrics (segments) should be separated by newlines. Output files will be written out alongside the original `txt` and media files using the same name.
|
||||||
|
|
||||||
|
Given a pre-existing lrc file with line-level timings, specifying the `yaml` format will generate word-level timings that fit inside each line's timing bounds.
|
||||||
|
|
||||||
|
Note: It is suggested to first convert plain lyrics to lrc files, adjust the line-level timings, and then add word-level timings by outputting to yaml files.
|
||||||
|
|
||||||
## How to Use
|
## How to Use
|
||||||
```sh
|
```sh
|
||||||
@@ -12,10 +16,13 @@ $ python ./txtlyric_to_lrc/main.py -h
|
|||||||
Usage: main.py [OPTIONS] DIRECTORY
|
Usage: main.py [OPTIONS] DIRECTORY
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-h, --help Show this message and exit.
|
-h, --help Show this message and exit.
|
||||||
-m, --model TEXT Which whisper model to use (choices are those of
|
-m, --model TEXT Which whisper model to use (choices are those of
|
||||||
whisper.available_models)
|
whisper.available_models)
|
||||||
-l, --language TEXT What language the lyrics are in [required]
|
-l, --language TEXT What language the lyrics are in [required]
|
||||||
|
-f, --format [yaml|lrc] Output format. 'yaml' writes a Lyricsfile (.yaml)
|
||||||
|
with word-level timings; 'lrc' writes classic LRC
|
||||||
|
(.lrc). [default: yaml]
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to Develop
|
## How to Develop
|
||||||
@@ -26,6 +33,4 @@ poetry install
|
|||||||
```
|
```
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
* detect musical interludes (might already be possible in stable-ts)
|
|
||||||
* connect with lyric retrieval services
|
* connect with lyric retrieval services
|
||||||
* support elrc word timings (need to check navidrome and subsonic clients won't throw a fit)
|
|
||||||
Generated
+108
-12
@@ -364,6 +364,18 @@ docs = ["sphinx"]
|
|||||||
gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
|
gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
|
||||||
tests = ["pytest (>=4.6)"]
|
tests = ["pytest (>=4.6)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mutagen"
|
||||||
|
version = "1.47.0"
|
||||||
|
description = "read and write audio tags for many formats"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
groups = ["main"]
|
||||||
|
files = [
|
||||||
|
{file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"},
|
||||||
|
{file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "networkx"
|
name = "networkx"
|
||||||
version = "3.4.2"
|
version = "3.4.2"
|
||||||
@@ -729,6 +741,101 @@ triton = {version = ">=2.0.0", markers = "platform_machine == \"x86_64\" and sys
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
dev = ["black", "flake8", "isort", "pytest", "scipy"]
|
dev = ["black", "flake8", "isort", "pytest", "scipy"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "packaging"
|
||||||
|
version = "26.2"
|
||||||
|
description = "Core utilities for Python packages"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["main"]
|
||||||
|
files = [
|
||||||
|
{file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"},
|
||||||
|
{file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyyaml"
|
||||||
|
version = "6.0.3"
|
||||||
|
description = "YAML parser and emitter for Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["main"]
|
||||||
|
files = [
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"},
|
||||||
|
{file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"},
|
||||||
|
{file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"},
|
||||||
|
{file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"},
|
||||||
|
{file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"},
|
||||||
|
{file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "2024.11.6"
|
version = "2024.11.6"
|
||||||
@@ -877,17 +984,6 @@ enabler = ["pytest-enabler (>=2.2)"]
|
|||||||
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
|
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
|
||||||
type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
|
type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "srt"
|
|
||||||
version = "3.5.3"
|
|
||||||
description = "A tiny library for parsing, modifying, and composing SRT files."
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=2.7"
|
|
||||||
groups = ["main"]
|
|
||||||
files = [
|
|
||||||
{file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stable-ts"
|
name = "stable-ts"
|
||||||
version = "2.19.0"
|
version = "2.19.0"
|
||||||
@@ -1155,4 +1251,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10"
|
python-versions = ">=3.10"
|
||||||
content-hash = "509d58adcee14dbd57cc252e3610fedb15e5a302767f6f8f190507250b1ff542"
|
content-hash = "218374baa7491f748761fd66d8ccfbc23957d1fd02d810f65f2be27cfb8cfc0d"
|
||||||
|
|||||||
+4
-1
@@ -10,7 +10,10 @@ requires-python = ">=3.10"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"stable-ts (>=2.19.0,<3.0.0)",
|
"stable-ts (>=2.19.0,<3.0.0)",
|
||||||
"click (>=8.2.1,<9.0.0)",
|
"click (>=8.2.1,<9.0.0)",
|
||||||
"srt (>=3.5.3,<4.0.0)"
|
"mutagen (>=1.47.0,<2.0.0)",
|
||||||
|
"pyyaml (>=6.0.0,<7.0.0)",
|
||||||
|
# Required at runtime by silero-vad
|
||||||
|
"packaging (>=21.0)"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,451 @@
|
|||||||
|
"""Unit tests for ``txtlyric_to_lrc.output``.
|
||||||
|
|
||||||
|
Run from the repo root with::
|
||||||
|
|
||||||
|
python -m unittest discover tests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# The package is laid out as a script-style module
|
||||||
|
# (``txtlyric_to_lrc/main.py`` does ``from output import ...``), so we
|
||||||
|
# add the package directory to ``sys.path`` to import ``output`` directly.
|
||||||
|
_PKG_DIR = os.path.normpath(
|
||||||
|
os.path.join(os.path.dirname(__file__), os.pardir, "txtlyric_to_lrc")
|
||||||
|
)
|
||||||
|
if _PKG_DIR not in sys.path:
|
||||||
|
sys.path.insert(0, _PKG_DIR)
|
||||||
|
|
||||||
|
import output # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeWord:
|
||||||
|
def __init__(self, word: str, start: float, end: float):
|
||||||
|
self.word = word
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSegment:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
start: float,
|
||||||
|
end: float,
|
||||||
|
words: Optional[List[_FakeWord]] = None,
|
||||||
|
):
|
||||||
|
self.text = text
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
self.words = words or []
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResult:
|
||||||
|
def __init__(self, segments: List[_FakeSegment]):
|
||||||
|
self.segments = segments
|
||||||
|
|
||||||
|
|
||||||
|
class ResultToLrcTests(unittest.TestCase):
|
||||||
|
def test_inserts_clear_marker_when_gap_exceeds_threshold(self):
|
||||||
|
result = _FakeResult([
|
||||||
|
_FakeSegment(" Hello world ", 1.0, 3.5),
|
||||||
|
_FakeSegment(" After the silence", 8.5, 11.0), # 5.0s gap
|
||||||
|
])
|
||||||
|
lrc = output.result_to_lrc(result, gap_threshold=1.5)
|
||||||
|
lines = lrc.strip().splitlines()
|
||||||
|
self.assertEqual(
|
||||||
|
lines,
|
||||||
|
[
|
||||||
|
"[00:01.00]Hello world",
|
||||||
|
"[00:03.50]",
|
||||||
|
"[00:08.50]After the silence",
|
||||||
|
"[00:11.00]",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_omits_clear_marker_when_gap_is_within_threshold(self):
|
||||||
|
result = _FakeResult([
|
||||||
|
_FakeSegment(" One", 1.0, 3.5),
|
||||||
|
_FakeSegment(" Two", 4.4, 6.0), # 0.9s gap, well under default
|
||||||
|
])
|
||||||
|
lrc = output.result_to_lrc(result)
|
||||||
|
lines = lrc.strip().splitlines()
|
||||||
|
self.assertEqual(
|
||||||
|
lines,
|
||||||
|
[
|
||||||
|
"[00:01.00]One",
|
||||||
|
"[00:04.40]Two",
|
||||||
|
"[00:06.00]",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_gap_threshold_argument_controls_marker_emission(self):
|
||||||
|
result = _FakeResult([
|
||||||
|
_FakeSegment(" One", 0.0, 2.0),
|
||||||
|
_FakeSegment(" Two", 3.7, 5.0), # 1.7s gap
|
||||||
|
])
|
||||||
|
lenient = output.result_to_lrc(result, gap_threshold=2.0).strip().splitlines()
|
||||||
|
strict = output.result_to_lrc(result, gap_threshold=1.5).strip().splitlines()
|
||||||
|
|
||||||
|
self.assertNotIn("[00:02.00]", lenient)
|
||||||
|
self.assertIn("[00:02.00]", strict)
|
||||||
|
|
||||||
|
def test_final_clear_marker_always_emitted(self):
|
||||||
|
result = _FakeResult([
|
||||||
|
_FakeSegment(" Only line", 1.0, 4.25),
|
||||||
|
])
|
||||||
|
lrc = output.result_to_lrc(result)
|
||||||
|
lines = lrc.strip().splitlines()
|
||||||
|
self.assertEqual(
|
||||||
|
lines,
|
||||||
|
[
|
||||||
|
"[00:01.00]Only line",
|
||||||
|
"[00:04.25]",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_empty_result_returns_empty_string(self):
|
||||||
|
self.assertEqual(output.result_to_lrc(_FakeResult([])), "")
|
||||||
|
|
||||||
|
def test_timestamp_handles_minutes_and_clamps_negative(self):
|
||||||
|
self.assertEqual(output._format_lrc_timestamp(75.5), "01:15.50")
|
||||||
|
self.assertEqual(output._format_lrc_timestamp(-1.0), "00:00.00")
|
||||||
|
|
||||||
|
|
||||||
|
class ResultToLyricsfileTests(unittest.TestCase):
|
||||||
|
METADATA = output.TrackMetadata(
|
||||||
|
title="Test Title",
|
||||||
|
artist="Test Artist",
|
||||||
|
album="Test Album",
|
||||||
|
duration_ms=240000,
|
||||||
|
language="en",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _english_segment(self) -> _FakeSegment:
|
||||||
|
words = [
|
||||||
|
_FakeWord("The", 12.45, 12.90),
|
||||||
|
_FakeWord(" club", 12.90, 13.50),
|
||||||
|
_FakeWord(" isn't", 13.50, 14.20),
|
||||||
|
_FakeWord(" the", 14.20, 14.60),
|
||||||
|
_FakeWord(" lover", 17.10, 18.20),
|
||||||
|
]
|
||||||
|
return _FakeSegment("The club isn't the lover", 12.45, 18.20, words=words)
|
||||||
|
|
||||||
|
def _cjk_segment(self) -> _FakeSegment:
|
||||||
|
words = [
|
||||||
|
_FakeWord("你", 1.0, 1.2),
|
||||||
|
_FakeWord("好", 1.2, 1.4),
|
||||||
|
_FakeWord("世", 1.4, 1.6),
|
||||||
|
_FakeWord("界", 1.6, 1.9),
|
||||||
|
]
|
||||||
|
return _FakeSegment("你好世界", 1.0, 1.9, words=words)
|
||||||
|
|
||||||
|
def test_english_words_use_trailing_spaces_except_last(self):
|
||||||
|
result = _FakeResult([self._english_segment()])
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
)
|
||||||
|
words = doc["lines"][0]["words"]
|
||||||
|
self.assertEqual(
|
||||||
|
[w["text"] for w in words],
|
||||||
|
["The ", "club ", "isn't ", "the ", "lover"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_english_words_concatenate_to_line_text(self):
|
||||||
|
result = _FakeResult([self._english_segment()])
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
)
|
||||||
|
line = doc["lines"][0]
|
||||||
|
self.assertEqual("".join(w["text"] for w in line["words"]), line["text"])
|
||||||
|
|
||||||
|
def test_cjk_words_have_no_spurious_spaces(self):
|
||||||
|
result = _FakeResult([self._cjk_segment()])
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
)
|
||||||
|
words = doc["lines"][0]["words"]
|
||||||
|
self.assertEqual([w["text"] for w in words], ["你", "好", "世", "界"])
|
||||||
|
self.assertEqual(
|
||||||
|
"".join(w["text"] for w in words), doc["lines"][0]["text"]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_word_timestamps_are_integer_milliseconds(self):
|
||||||
|
result = _FakeResult([self._english_segment()])
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
)
|
||||||
|
first_word = doc["lines"][0]["words"][0]
|
||||||
|
self.assertEqual(first_word["start_ms"], 12450)
|
||||||
|
self.assertEqual(first_word["end_ms"], 12900)
|
||||||
|
self.assertIsInstance(first_word["start_ms"], int)
|
||||||
|
self.assertIsInstance(first_word["end_ms"], int)
|
||||||
|
|
||||||
|
def test_segment_without_words_omits_words_key(self):
|
||||||
|
seg = _FakeSegment(" Bare line ", 20.0, 22.5)
|
||||||
|
result = _FakeResult([seg])
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
)
|
||||||
|
line = doc["lines"][0]
|
||||||
|
self.assertNotIn("words", line)
|
||||||
|
self.assertEqual(line["text"], "Bare line")
|
||||||
|
self.assertEqual(line["start_ms"], 20000)
|
||||||
|
self.assertEqual(line["end_ms"], 22500)
|
||||||
|
|
||||||
|
def test_metadata_optional_fields_omitted_when_unset(self):
|
||||||
|
meta = output.TrackMetadata(title="T", artist="A")
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(_FakeResult([]), meta)
|
||||||
|
)
|
||||||
|
self.assertEqual(doc["metadata"]["title"], "T")
|
||||||
|
self.assertEqual(doc["metadata"]["artist"], "A")
|
||||||
|
self.assertEqual(doc["metadata"]["instrumental"], False)
|
||||||
|
for absent in ("album", "duration_ms", "language"):
|
||||||
|
self.assertNotIn(absent, doc["metadata"])
|
||||||
|
|
||||||
|
def test_metadata_optional_fields_included_when_set(self):
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
|
||||||
|
)
|
||||||
|
self.assertEqual(doc["metadata"]["album"], "Test Album")
|
||||||
|
self.assertEqual(doc["metadata"]["duration_ms"], 240000)
|
||||||
|
self.assertEqual(doc["metadata"]["language"], "en")
|
||||||
|
|
||||||
|
def test_plain_block_is_omitted(self):
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(
|
||||||
|
_FakeResult([self._english_segment()]), self.METADATA
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertNotIn("plain", doc)
|
||||||
|
|
||||||
|
def test_version_is_one_dot_zero(self):
|
||||||
|
doc = yaml.safe_load(
|
||||||
|
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
|
||||||
|
)
|
||||||
|
self.assertEqual(doc["version"], "1.0")
|
||||||
|
|
||||||
|
def test_apostrophe_round_trips_through_yaml(self):
|
||||||
|
result = _FakeResult([self._english_segment()])
|
||||||
|
rendered = output.result_to_lyricsfile(result, self.METADATA)
|
||||||
|
doc = yaml.safe_load(rendered)
|
||||||
|
self.assertEqual(doc["lines"][0]["words"][2]["text"], "isn't ")
|
||||||
|
|
||||||
|
|
||||||
|
class ParseLrcFileTests(unittest.TestCase):
|
||||||
|
def test_basic_timed_lines(self):
|
||||||
|
lrc = (
|
||||||
|
"[00:01.00]Hello world\n"
|
||||||
|
"[00:04.50]Second line\n"
|
||||||
|
"[00:08.00]Third line\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 3)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["start"], 1.0)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["end"], 4.5)
|
||||||
|
self.assertEqual(parsed.segments[0]["text"], "Hello world")
|
||||||
|
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
|
||||||
|
self.assertAlmostEqual(parsed.segments[1]["end"], 8.0)
|
||||||
|
self.assertEqual(parsed.segments[1]["text"], "Second line")
|
||||||
|
|
||||||
|
def test_gap_markers_set_end_on_preceding_segment(self):
|
||||||
|
lrc = (
|
||||||
|
"[00:01.00]First line\n"
|
||||||
|
"[00:03.00]\n"
|
||||||
|
"[00:10.00]After gap\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 2)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["end"], 3.0)
|
||||||
|
self.assertAlmostEqual(parsed.segments[1]["start"], 10.0)
|
||||||
|
|
||||||
|
def test_last_segment_gets_default_end_when_no_trailing_marker(self):
|
||||||
|
lrc = "[00:05.00]Only line\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 1)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["end"], 10.0)
|
||||||
|
|
||||||
|
def test_last_segment_uses_trailing_gap_marker(self):
|
||||||
|
lrc = (
|
||||||
|
"[00:05.00]Last line\n"
|
||||||
|
"[00:09.50]\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 1)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["end"], 9.5)
|
||||||
|
|
||||||
|
def test_metadata_tags_are_extracted(self):
|
||||||
|
lrc = (
|
||||||
|
"[ti:My Song]\n"
|
||||||
|
"[ar:My Artist]\n"
|
||||||
|
"[al:My Album]\n"
|
||||||
|
"[00:01.00]Hello\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(parsed.metadata_tags["title"], "My Song")
|
||||||
|
self.assertEqual(parsed.metadata_tags["artist"], "My Artist")
|
||||||
|
self.assertEqual(parsed.metadata_tags["album"], "My Album")
|
||||||
|
|
||||||
|
def test_language_tag_variants(self):
|
||||||
|
for tag in ("la", "lang", "language"):
|
||||||
|
lrc = f"[{tag}:ja]\n[00:01.00]Hello\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(
|
||||||
|
parsed.metadata_tags.get("language"), "ja",
|
||||||
|
f"tag [{tag}:ja] should map to language=ja",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_length_tag_parsed_to_duration_ms(self):
|
||||||
|
lrc = "[length:3:40]\n[00:01.00]Hello\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(parsed.metadata_tags["duration_ms"], str(220000))
|
||||||
|
|
||||||
|
def test_length_tag_with_fraction(self):
|
||||||
|
lrc = "[length:1:30.50]\n[00:01.00]Hello\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(parsed.metadata_tags["duration_ms"], str(90500))
|
||||||
|
|
||||||
|
def test_offset_applied_to_timestamps(self):
|
||||||
|
lrc = (
|
||||||
|
"[offset:+500]\n"
|
||||||
|
"[00:01.00]Hello\n"
|
||||||
|
"[00:04.00]World\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["start"], 1.5)
|
||||||
|
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
|
||||||
|
|
||||||
|
def test_negative_offset_clamps_to_zero(self):
|
||||||
|
lrc = (
|
||||||
|
"[offset:-2000]\n"
|
||||||
|
"[00:01.00]Hello\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["start"], 0.0)
|
||||||
|
|
||||||
|
def test_unknown_tags_go_to_dropped(self):
|
||||||
|
lrc = (
|
||||||
|
"[re:LRC Editor v3.0]\n"
|
||||||
|
"[by:Some Person]\n"
|
||||||
|
"[ve:1.0]\n"
|
||||||
|
"[00:01.00]Hello\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertIn("re", parsed.dropped_tags)
|
||||||
|
self.assertIn("by", parsed.dropped_tags)
|
||||||
|
self.assertIn("ve", parsed.dropped_tags)
|
||||||
|
self.assertEqual(len(parsed.metadata_tags), 0)
|
||||||
|
|
||||||
|
def test_empty_content_returns_empty(self):
|
||||||
|
parsed = output.parse_lrc_file("")
|
||||||
|
self.assertEqual(len(parsed.segments), 0)
|
||||||
|
self.assertEqual(len(parsed.metadata_tags), 0)
|
||||||
|
|
||||||
|
def test_metadata_only_returns_no_segments(self):
|
||||||
|
lrc = "[ti:Title]\n[ar:Artist]\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 0)
|
||||||
|
self.assertEqual(parsed.metadata_tags["title"], "Title")
|
||||||
|
|
||||||
|
def test_two_digit_fraction(self):
|
||||||
|
lrc = "[01:23.45]Line\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["start"], 83.45)
|
||||||
|
|
||||||
|
def test_three_digit_fraction(self):
|
||||||
|
lrc = "[01:23.456]Line\n"
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["start"], 83.456)
|
||||||
|
|
||||||
|
def test_multiple_gap_markers_only_last_wins(self):
|
||||||
|
lrc = (
|
||||||
|
"[00:01.00]Line\n"
|
||||||
|
"[00:03.00]\n"
|
||||||
|
"[00:05.00]\n"
|
||||||
|
"[00:10.00]Next\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(len(parsed.segments), 2)
|
||||||
|
self.assertAlmostEqual(parsed.segments[0]["end"], 5.0)
|
||||||
|
|
||||||
|
def test_segments_sorted_by_timestamp(self):
|
||||||
|
lrc = (
|
||||||
|
"[00:10.00]Second\n"
|
||||||
|
"[00:01.00]First\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(parsed.segments[0]["text"], "First")
|
||||||
|
self.assertEqual(parsed.segments[1]["text"], "Second")
|
||||||
|
|
||||||
|
|
||||||
|
class MergeMetadataTests(unittest.TestCase):
|
||||||
|
def test_lrc_tags_override_placeholders(self):
|
||||||
|
parsed = output.ParsedLrc(
|
||||||
|
metadata_tags={"title": "LRC Title", "artist": "LRC Artist", "album": "LRC Album"},
|
||||||
|
)
|
||||||
|
# We can't easily call merge_metadata without an audio file, but
|
||||||
|
# we can test the logic by calling extract_metadata with a
|
||||||
|
# nonexistent path (mutagen returns None) and then merging.
|
||||||
|
meta = output.TrackMetadata(title="Unknown", artist="Unknown Artist")
|
||||||
|
# Simulate merge logic directly
|
||||||
|
tags = parsed.metadata_tags
|
||||||
|
meta.title = tags.get("title") or meta.title
|
||||||
|
meta.artist = tags.get("artist") or meta.artist
|
||||||
|
meta.album = tags.get("album") or meta.album
|
||||||
|
self.assertEqual(meta.title, "LRC Title")
|
||||||
|
self.assertEqual(meta.artist, "LRC Artist")
|
||||||
|
self.assertEqual(meta.album, "LRC Album")
|
||||||
|
|
||||||
|
def test_cli_language_overrides_lrc_tag(self):
|
||||||
|
parsed = output.ParsedLrc(
|
||||||
|
metadata_tags={"language": "ja"},
|
||||||
|
)
|
||||||
|
# merge_metadata passes cli_language first
|
||||||
|
cli_language = "en"
|
||||||
|
language = cli_language or parsed.metadata_tags.get("language")
|
||||||
|
self.assertEqual(language, "en")
|
||||||
|
|
||||||
|
def test_lrc_language_used_when_cli_absent(self):
|
||||||
|
parsed = output.ParsedLrc(
|
||||||
|
metadata_tags={"language": "ja"},
|
||||||
|
)
|
||||||
|
cli_language = None
|
||||||
|
language = cli_language or parsed.metadata_tags.get("language")
|
||||||
|
self.assertEqual(language, "ja")
|
||||||
|
|
||||||
|
def test_dropped_tags_populated(self):
|
||||||
|
lrc = (
|
||||||
|
"[re:SomeEditor]\n"
|
||||||
|
"[by:Author]\n"
|
||||||
|
"[ti:Title]\n"
|
||||||
|
"[00:01.00]Hello\n"
|
||||||
|
)
|
||||||
|
parsed = output.parse_lrc_file(lrc)
|
||||||
|
self.assertEqual(parsed.dropped_tags["re"], "SomeEditor")
|
||||||
|
self.assertEqual(parsed.dropped_tags["by"], "Author")
|
||||||
|
self.assertNotIn("ti", parsed.dropped_tags)
|
||||||
|
|
||||||
|
|
||||||
|
class FileIsAudioTests(unittest.TestCase):
|
||||||
|
def test_recognises_known_extensions(self):
|
||||||
|
for name in ("song.mp3", "track.flac", "tune.OPUS"):
|
||||||
|
self.assertTrue(
|
||||||
|
output.file_is_audio(name.lower()),
|
||||||
|
f"expected {name!r} to be recognised as audio",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_rejects_unknown_extensions(self):
|
||||||
|
for name in ("notes.txt", "cover.jpg", "lyrics.lrc", "lyrics.yaml"):
|
||||||
|
self.assertFalse(output.file_is_audio(name))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
+124
-56
@@ -2,79 +2,147 @@ import os
|
|||||||
|
|
||||||
import click
|
import click
|
||||||
import stable_whisper
|
import stable_whisper
|
||||||
import srt
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
song_file_extensions = set([
|
from output import (
|
||||||
"mp3", # explicitly supported by whisper
|
extract_metadata,
|
||||||
"wav",
|
file_is_audio,
|
||||||
"mp4",
|
merge_metadata,
|
||||||
"mpeg",
|
parse_lrc_file,
|
||||||
"mpga",
|
result_to_lrc,
|
||||||
"m4a",
|
result_to_lyricsfile,
|
||||||
"webm",
|
)
|
||||||
"flac", # stable-ts likely uses ffmpeg to convert this for whisper
|
|
||||||
])
|
|
||||||
|
OUTPUT_EXTENSIONS = {
|
||||||
|
"yaml": "yaml",
|
||||||
|
"lrc": "lrc",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.help_option("--help", "-h")
|
@click.help_option("--help", "-h")
|
||||||
@click.option('--model', '-m', default='small', help='Which whisper model to use (choices are those of whisper.available_models)')
|
@click.option(
|
||||||
@click.option('--language', '-l', required=True, help='What language the lyrics are in')
|
"--model",
|
||||||
@click.argument('directory')
|
"-m",
|
||||||
def main(model, language, directory):
|
default="small",
|
||||||
|
help="Which whisper model to use (choices are those of whisper.available_models)",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--language",
|
||||||
|
"-l",
|
||||||
|
required=True,
|
||||||
|
help="What language the lyrics are in",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--format",
|
||||||
|
"-f",
|
||||||
|
"output_format",
|
||||||
|
type=click.Choice(["yaml", "lrc"], case_sensitive=False),
|
||||||
|
default="yaml",
|
||||||
|
show_default=True,
|
||||||
|
help="Output format. 'yaml' writes a Lyricsfile (.yaml) with word-level timings; 'lrc' writes classic LRC (.lrc).",
|
||||||
|
)
|
||||||
|
@click.argument("directory")
|
||||||
|
def main(model, language, output_format, directory):
|
||||||
|
output_format = output_format.lower()
|
||||||
|
output_ext = OUTPUT_EXTENSIONS[output_format]
|
||||||
|
|
||||||
print(f"Loading {model} model for use with language {language}...")
|
print(f"Loading {model} model for use with language {language}...")
|
||||||
model = stable_whisper.load_model(model)
|
model = stable_whisper.load_model(model)
|
||||||
|
|
||||||
for (dirpath, _, filenames) in os.walk(directory):
|
for (dirpath, _, filenames) in os.walk(directory):
|
||||||
if any([ file_is_audio(f) for f in filenames ]):
|
if not any(file_is_audio(f) for f in filenames):
|
||||||
# audio files exist here, so let's process this folder
|
continue
|
||||||
for f in filenames:
|
for f in filenames:
|
||||||
if file_is_audio(f):
|
if not file_is_audio(f):
|
||||||
filename = os.path.join(dirpath, f)
|
continue
|
||||||
# get corresponding txt file name
|
audio_path = os.path.join(dirpath, f)
|
||||||
# (we expect unaligned files to have the same filename as the song, except for the extension)
|
base, _ = os.path.splitext(audio_path)
|
||||||
unaligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".txt")
|
output_path = base + "." + output_ext
|
||||||
# (we also expect existing .lrc files to contain aligned lyrics)
|
|
||||||
aligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".lrc")
|
|
||||||
|
|
||||||
# run model to get aligned srt
|
if os.path.exists(output_path):
|
||||||
if not os.path.exists(unaligned_lyrics_filename):
|
print(f"Aligned lyric file already exists for {audio_path}: {output_path}")
|
||||||
print(f"No corresponding unaligned lyric txt exists for {filename}")
|
continue
|
||||||
continue
|
|
||||||
if os.path.exists(aligned_lyrics_filename):
|
|
||||||
print(f"Corresponding aligned lyric txt already exists for {filename}")
|
|
||||||
continue
|
|
||||||
print(f"Aligning lyrics for {filename}")
|
|
||||||
|
|
||||||
result: stable_whisper.WhisperResult = model.align(filename, open(unaligned_lyrics_filename).read(), language=language, original_split=True, regroup=False)
|
lrc_path = base + ".lrc"
|
||||||
|
txt_path = base + ".txt"
|
||||||
|
|
||||||
# turn srt to lrc
|
if os.path.exists(lrc_path):
|
||||||
srt = result.to_srt_vtt(filepath=None, word_level=False)
|
_refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language)
|
||||||
lrc = srt_to_lrc(srt)
|
elif os.path.exists(txt_path):
|
||||||
print(f"Writing aligned lyrics at: {aligned_lyrics_filename}")
|
_align_from_txt(model, audio_path, txt_path, output_path, output_format, language)
|
||||||
open(aligned_lyrics_filename, "x").write(lrc)
|
else:
|
||||||
|
print(f"No .lrc or .txt sidecar found for {audio_path}")
|
||||||
|
|
||||||
def get_file_extension(filename: str) -> str:
|
|
||||||
# "asdf.omg.lol" -> [asdf.omg, .lol] -> lol
|
|
||||||
return os.path.splitext(filename)[1][1:]
|
|
||||||
|
|
||||||
def file_is_audio(filename: str) -> bool:
|
def _align_from_txt(model, audio_path, txt_path, output_path, output_format, language):
|
||||||
return get_file_extension(filename) in song_file_extensions
|
"""Full alignment from plain text: determines line boundaries and word timings."""
|
||||||
|
print(f"Aligning lyrics for {audio_path} (from .txt)")
|
||||||
|
with open(txt_path) as fh:
|
||||||
|
unaligned_text = fh.read()
|
||||||
|
|
||||||
def timedelta_to_hhmmssss(td: datetime.timedelta) -> str:
|
result = model.align(
|
||||||
dt = datetime.datetime(1969, 1, 1) + td
|
audio_path,
|
||||||
return dt.strftime('%M:%S.%f')[:-4]
|
unaligned_text,
|
||||||
|
language=language,
|
||||||
|
original_split=True,
|
||||||
|
regroup=False,
|
||||||
|
vad=True,
|
||||||
|
nonspeech_skip=None, # use vad to trim line timings but not skip internal non-speech sections
|
||||||
|
)
|
||||||
|
|
||||||
def srt_to_lrc(srt_text: str) -> str:
|
if not result or not list(result.segments):
|
||||||
subs = list(srt.parse(srt_text))
|
print(f" Alignment produced no segments; skipping")
|
||||||
lines = [f"[{timedelta_to_hhmmssss(s.start)}]{s.content}" for s in subs]
|
return
|
||||||
|
|
||||||
# add the end of lyrics marker
|
result.adjust_gaps(one_section=True)
|
||||||
end_time = subs[-1].end
|
|
||||||
lines.append(f"[{timedelta_to_hhmmssss(end_time)}]")
|
if output_format == "yaml":
|
||||||
return "\n".join(lines)
|
metadata = extract_metadata(audio_path, language=language)
|
||||||
|
content = result_to_lyricsfile(result, metadata)
|
||||||
|
else:
|
||||||
|
content = result_to_lrc(result)
|
||||||
|
|
||||||
|
print(f" Writing aligned lyrics at: {output_path}")
|
||||||
|
with open(output_path, "x") as out_fh:
|
||||||
|
out_fh.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language):
|
||||||
|
"""Word-level refinement of existing LRC: keeps line boundaries, adds word timings."""
|
||||||
|
print(f"Refining lyrics for {audio_path} (from .lrc)")
|
||||||
|
with open(lrc_path) as fh:
|
||||||
|
lrc_content = fh.read()
|
||||||
|
|
||||||
|
parsed = parse_lrc_file(lrc_content)
|
||||||
|
|
||||||
|
if not parsed.segments:
|
||||||
|
print(f" No timed lines found in {lrc_path}; skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
result = model.align_words(
|
||||||
|
audio_path,
|
||||||
|
parsed.segments,
|
||||||
|
language=language,
|
||||||
|
vad=False, # don't trim existing line timings
|
||||||
|
suppress_silence=False, # don't adjust word timestamps on detected silence
|
||||||
|
regroup=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not result or not list(result.segments):
|
||||||
|
print(f" Word alignment produced no segments; skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
if output_format == "yaml":
|
||||||
|
metadata = merge_metadata(audio_path, parsed, cli_language=language)
|
||||||
|
content = result_to_lyricsfile(result, metadata)
|
||||||
|
else:
|
||||||
|
content = result_to_lrc(result)
|
||||||
|
|
||||||
|
print(f" Writing refined lyrics at: {output_path}")
|
||||||
|
with open(output_path, "x") as out_fh:
|
||||||
|
out_fh.write(content)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,431 @@
|
|||||||
|
"""Output writers, LRC parsing, and metadata extraction for txtlyric-to-lrc.
|
||||||
|
|
||||||
|
Writers:
|
||||||
|
|
||||||
|
- :func:`result_to_lrc` -- emits classic LRC, inserting a clear-display
|
||||||
|
marker whenever the gap between consecutive segments exceeds a threshold
|
||||||
|
so that lines do not visually persist through instrumental pauses.
|
||||||
|
- :func:`result_to_lyricsfile` -- emits the YAML-based Lyricsfile format
|
||||||
|
used by lrcget/lrclib, with word-level timings.
|
||||||
|
|
||||||
|
LRC input:
|
||||||
|
|
||||||
|
- :func:`parse_lrc_file` -- parses an LRC file into metadata tags and
|
||||||
|
timed segments (with ``start``/``end``/``text``) suitable for
|
||||||
|
``stable_whisper.align_words``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import mutagen
|
||||||
|
import stable_whisper
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
song_file_extensions = {
|
||||||
|
"mp3",
|
||||||
|
"wav",
|
||||||
|
"mp4",
|
||||||
|
"mpeg",
|
||||||
|
"mpga",
|
||||||
|
"m4a",
|
||||||
|
"webm",
|
||||||
|
"flac",
|
||||||
|
"opus",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_extension(filename: str) -> str:
|
||||||
|
return os.path.splitext(filename)[1][1:]
|
||||||
|
|
||||||
|
|
||||||
|
def file_is_audio(filename: str) -> bool:
|
||||||
|
return get_file_extension(filename) in song_file_extensions
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Metadata
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TrackMetadata:
|
||||||
|
title: str
|
||||||
|
artist: str
|
||||||
|
album: Optional[str] = None
|
||||||
|
duration_ms: Optional[int] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
offset_ms: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_metadata(audio_path: str, language: Optional[str] = None) -> TrackMetadata:
|
||||||
|
"""Read tags + duration from the audio file via mutagen.
|
||||||
|
|
||||||
|
Falls back to the placeholder strings ``"Unknown"`` / ``"Unknown Artist"``
|
||||||
|
when the corresponding tag is missing, since Lyricsfile requires
|
||||||
|
``metadata.title`` and ``metadata.artist``.
|
||||||
|
"""
|
||||||
|
f = mutagen.File(audio_path, easy=True)
|
||||||
|
return TrackMetadata(
|
||||||
|
title=_first_tag(f, "title") or "Unknown",
|
||||||
|
artist=_first_tag(f, "artist") or "Unknown Artist",
|
||||||
|
album=_first_tag(f, "album"),
|
||||||
|
duration_ms=_duration_ms(f),
|
||||||
|
language=language,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _first_tag(f, key: str) -> Optional[str]:
|
||||||
|
if f is None:
|
||||||
|
return None
|
||||||
|
val = f.get(key)
|
||||||
|
if not val:
|
||||||
|
return None
|
||||||
|
if isinstance(val, list):
|
||||||
|
val = val[0] if val else None
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
val = str(val).strip()
|
||||||
|
return val or None
|
||||||
|
|
||||||
|
|
||||||
|
def _duration_ms(f) -> Optional[int]:
|
||||||
|
info = getattr(f, "info", None) if f is not None else None
|
||||||
|
length = getattr(info, "length", None) if info is not None else None
|
||||||
|
if length is None:
|
||||||
|
return None
|
||||||
|
return int(round(length * 1000))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LRC parsing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_LRC_METADATA_KEYS_TO_LYRICSFILE = {
|
||||||
|
"ti": "title",
|
||||||
|
"ar": "artist",
|
||||||
|
"al": "album",
|
||||||
|
"la": "language",
|
||||||
|
"lang": "language",
|
||||||
|
"language": "language",
|
||||||
|
}
|
||||||
|
|
||||||
|
_TIMESTAMP_RE = re.compile(r"^(\d{1,3}):(\d{2})(?:[.:](\d{2,3}))?$")
|
||||||
|
_LINE_RE = re.compile(r"^\[([^\]]+)\](.*)$")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParsedLrc:
|
||||||
|
"""Result of parsing an LRC file."""
|
||||||
|
segments: List[dict] = field(default_factory=list)
|
||||||
|
metadata_tags: Dict[str, str] = field(default_factory=dict)
|
||||||
|
dropped_tags: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_lrc_timestamp(token: str) -> Optional[float]:
|
||||||
|
"""Parse an LRC timestamp token to seconds, or None if not a timestamp."""
|
||||||
|
m = _TIMESTAMP_RE.match(token.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
minutes = int(m.group(1))
|
||||||
|
seconds = int(m.group(2))
|
||||||
|
frac_raw = m.group(3) or "0"
|
||||||
|
if len(frac_raw) == 2:
|
||||||
|
frac_ms = int(frac_raw) * 10
|
||||||
|
else:
|
||||||
|
frac_ms = int(frac_raw)
|
||||||
|
return minutes * 60 + seconds + frac_ms / 1000.0
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_lrc_length(value: str) -> Optional[int]:
|
||||||
|
"""Parse an LRC [length:...] value to milliseconds."""
|
||||||
|
value = value.strip()
|
||||||
|
m = re.match(r"^(\d+):(\d{2})(?:[.:](\d{2,3}))?$", value)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
minutes = int(m.group(1))
|
||||||
|
seconds = int(m.group(2))
|
||||||
|
frac_raw = m.group(3) or "0"
|
||||||
|
if len(frac_raw) == 2:
|
||||||
|
frac_ms = int(frac_raw) * 10
|
||||||
|
else:
|
||||||
|
frac_ms = int(frac_raw)
|
||||||
|
return (minutes * 60 + seconds) * 1000 + frac_ms
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_lrc_offset(value: str) -> Optional[int]:
|
||||||
|
"""Parse an LRC [offset:...] value to milliseconds (integer, may be negative)."""
|
||||||
|
value = value.strip()
|
||||||
|
m = re.match(r"^([+-]?\d+)$", value)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return int(m.group(1))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lrc_file(content: str) -> ParsedLrc:
|
||||||
|
"""Parse LRC content into timed segments and metadata.
|
||||||
|
|
||||||
|
Returns a :class:`ParsedLrc` containing:
|
||||||
|
|
||||||
|
- ``segments``: list of ``{"start": float, "end": float, "text": str}``
|
||||||
|
suitable for ``model.align_words()``. Gap markers (empty-text
|
||||||
|
timestamps) contribute an ``end`` to the preceding segment but do not
|
||||||
|
appear as segments themselves.
|
||||||
|
- ``metadata_tags``: dict of recognised LRC metadata mapped to
|
||||||
|
Lyricsfile-compatible keys.
|
||||||
|
- ``dropped_tags``: dict of LRC metadata tags that have no Lyricsfile
|
||||||
|
equivalent (warned about by the caller).
|
||||||
|
|
||||||
|
If the LRC contains an ``[offset:...]`` tag, the offset is applied to
|
||||||
|
all parsed timestamps so that the returned times are absolute.
|
||||||
|
"""
|
||||||
|
result = ParsedLrc()
|
||||||
|
|
||||||
|
raw_timed: List[Tuple[float, str]] = []
|
||||||
|
|
||||||
|
for raw_line in content.splitlines():
|
||||||
|
raw_line = raw_line.strip()
|
||||||
|
if not raw_line:
|
||||||
|
continue
|
||||||
|
m = _LINE_RE.match(raw_line)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
|
||||||
|
bracket_content = m.group(1)
|
||||||
|
after_bracket = m.group(2)
|
||||||
|
|
||||||
|
ts = _parse_lrc_timestamp(bracket_content)
|
||||||
|
if ts is not None:
|
||||||
|
raw_timed.append((ts, after_bracket.strip()))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Metadata tag: [key:value]
|
||||||
|
if ":" in bracket_content:
|
||||||
|
key, _, value = bracket_content.partition(":")
|
||||||
|
key = key.strip().lower()
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
lyricsfile_key = _LRC_METADATA_KEYS_TO_LYRICSFILE.get(key)
|
||||||
|
if lyricsfile_key is not None:
|
||||||
|
result.metadata_tags[lyricsfile_key] = value
|
||||||
|
elif key == "length":
|
||||||
|
length_ms = _parse_lrc_length(value)
|
||||||
|
if length_ms is not None:
|
||||||
|
result.metadata_tags["duration_ms"] = str(length_ms)
|
||||||
|
else:
|
||||||
|
result.dropped_tags[key] = value
|
||||||
|
elif key == "offset":
|
||||||
|
offset_ms = _parse_lrc_offset(value)
|
||||||
|
if offset_ms is not None:
|
||||||
|
result.metadata_tags["offset_ms"] = str(offset_ms)
|
||||||
|
else:
|
||||||
|
result.dropped_tags[key] = value
|
||||||
|
else:
|
||||||
|
result.dropped_tags[key] = value
|
||||||
|
|
||||||
|
if not raw_timed:
|
||||||
|
return result
|
||||||
|
|
||||||
|
raw_timed.sort(key=lambda t: t[0])
|
||||||
|
|
||||||
|
# Apply offset: shift all timestamps so output is absolute
|
||||||
|
offset_s = 0.0
|
||||||
|
if "offset_ms" in result.metadata_tags:
|
||||||
|
offset_s = int(result.metadata_tags["offset_ms"]) / 1000.0
|
||||||
|
|
||||||
|
adjusted: List[Tuple[float, str]] = [
|
||||||
|
(max(0.0, ts + offset_s), text) for ts, text in raw_timed
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build segments: non-empty text lines become segments; empty-text
|
||||||
|
# lines (gap markers) contribute an end time to the preceding segment.
|
||||||
|
pending_segments: List[dict] = []
|
||||||
|
for ts, text in adjusted:
|
||||||
|
if text:
|
||||||
|
pending_segments.append({"start": ts, "end": None, "text": text})
|
||||||
|
elif pending_segments:
|
||||||
|
# Gap marker: set the preceding segment's end
|
||||||
|
pending_segments[-1]["end"] = ts
|
||||||
|
|
||||||
|
# Fill in missing end times: end of seg N = start of seg N+1
|
||||||
|
for i in range(len(pending_segments) - 1):
|
||||||
|
if pending_segments[i]["end"] is None:
|
||||||
|
pending_segments[i]["end"] = pending_segments[i + 1]["start"]
|
||||||
|
|
||||||
|
# Last segment: if no explicit end (no trailing gap marker), use
|
||||||
|
# start + 5s as a reasonable upper bound; align_words will confine
|
||||||
|
# within whatever audio is available.
|
||||||
|
if pending_segments and pending_segments[-1]["end"] is None:
|
||||||
|
pending_segments[-1]["end"] = pending_segments[-1]["start"] + 5.0
|
||||||
|
|
||||||
|
result.segments = pending_segments
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def merge_metadata(
|
||||||
|
audio_path: str,
|
||||||
|
lrc_parsed: ParsedLrc,
|
||||||
|
cli_language: Optional[str] = None,
|
||||||
|
) -> TrackMetadata:
|
||||||
|
"""Build a :class:`TrackMetadata` by merging LRC tags over mutagen tags.
|
||||||
|
|
||||||
|
Precedence (highest first): LRC tag → mutagen tag → placeholder.
|
||||||
|
The ``--language`` CLI flag overrides both LRC and mutagen for language.
|
||||||
|
|
||||||
|
Warnings are printed to stderr for LRC metadata tags that have no
|
||||||
|
Lyricsfile equivalent and are therefore dropped.
|
||||||
|
"""
|
||||||
|
audio_meta = extract_metadata(audio_path, language=cli_language)
|
||||||
|
|
||||||
|
tags = lrc_parsed.metadata_tags
|
||||||
|
|
||||||
|
title = tags.get("title") or audio_meta.title
|
||||||
|
artist = tags.get("artist") or audio_meta.artist
|
||||||
|
album = tags.get("album") or audio_meta.album
|
||||||
|
|
||||||
|
duration_ms = audio_meta.duration_ms
|
||||||
|
if "duration_ms" in tags:
|
||||||
|
try:
|
||||||
|
duration_ms = int(tags["duration_ms"])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
language = cli_language or tags.get("language") or audio_meta.language
|
||||||
|
|
||||||
|
for key, value in lrc_parsed.dropped_tags.items():
|
||||||
|
print(
|
||||||
|
f" Warning: LRC tag [{key}:{value}] has no Lyricsfile equivalent; skipped",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
return TrackMetadata(
|
||||||
|
title=title,
|
||||||
|
artist=artist,
|
||||||
|
album=album,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
language=language,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_lrc_timestamp(seconds: float) -> str:
|
||||||
|
if seconds < 0:
|
||||||
|
seconds = 0.0
|
||||||
|
minutes = int(seconds // 60)
|
||||||
|
remainder = seconds - minutes * 60
|
||||||
|
return f"{minutes:02d}:{remainder:05.2f}"
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_lrc(
|
||||||
|
result: "stable_whisper.WhisperResult",
|
||||||
|
gap_threshold: float = 1.5,
|
||||||
|
) -> str:
|
||||||
|
"""Render ``result`` as LRC text.
|
||||||
|
|
||||||
|
Between consecutive segments, if ``next_segment.start - this_segment.end``
|
||||||
|
exceeds ``gap_threshold`` seconds, an empty timestamp is emitted at
|
||||||
|
``this_segment.end`` so consumers stop displaying the line during the
|
||||||
|
pause. A trailing empty timestamp at the end of the last segment is
|
||||||
|
always emitted.
|
||||||
|
"""
|
||||||
|
segments = list(result.segments)
|
||||||
|
if not segments:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
lines: List[str] = []
|
||||||
|
for i, seg in enumerate(segments):
|
||||||
|
text = (seg.text or "").strip()
|
||||||
|
lines.append(f"[{_format_lrc_timestamp(seg.start)}]{text}")
|
||||||
|
next_seg = segments[i + 1] if i + 1 < len(segments) else None
|
||||||
|
if next_seg is None:
|
||||||
|
lines.append(f"[{_format_lrc_timestamp(seg.end)}]")
|
||||||
|
elif next_seg.start - seg.end > gap_threshold:
|
||||||
|
lines.append(f"[{_format_lrc_timestamp(seg.end)}]")
|
||||||
|
|
||||||
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_lyricsfile(
|
||||||
|
result: "stable_whisper.WhisperResult",
|
||||||
|
metadata: TrackMetadata,
|
||||||
|
) -> str:
|
||||||
|
"""Render ``result`` as a Lyricsfile YAML string.
|
||||||
|
|
||||||
|
The ``plain`` block is intentionally omitted; consumers receive only
|
||||||
|
the synced ``lines`` array (with word-level timings when available).
|
||||||
|
"""
|
||||||
|
metadata_obj = {
|
||||||
|
"title": metadata.title,
|
||||||
|
"artist": metadata.artist,
|
||||||
|
}
|
||||||
|
if metadata.album:
|
||||||
|
metadata_obj["album"] = metadata.album
|
||||||
|
if metadata.duration_ms is not None:
|
||||||
|
metadata_obj["duration_ms"] = metadata.duration_ms
|
||||||
|
if metadata.language:
|
||||||
|
metadata_obj["language"] = metadata.language
|
||||||
|
if metadata.offset_ms is not None:
|
||||||
|
metadata_obj["offset_ms"] = metadata.offset_ms
|
||||||
|
metadata_obj["instrumental"] = False
|
||||||
|
|
||||||
|
lines_out = []
|
||||||
|
for seg in result.segments:
|
||||||
|
line_obj = {
|
||||||
|
"text": (seg.text or "").strip(),
|
||||||
|
"start_ms": _to_ms(seg.start),
|
||||||
|
"end_ms": _to_ms(seg.end),
|
||||||
|
}
|
||||||
|
word_objs = _words_to_lyricsfile_words(seg.words) if seg.words else None
|
||||||
|
if word_objs:
|
||||||
|
line_obj["words"] = word_objs
|
||||||
|
lines_out.append(line_obj)
|
||||||
|
|
||||||
|
document = {
|
||||||
|
"version": "1.0",
|
||||||
|
"metadata": metadata_obj,
|
||||||
|
"lines": lines_out,
|
||||||
|
}
|
||||||
|
|
||||||
|
return yaml.safe_dump(
|
||||||
|
document,
|
||||||
|
sort_keys=False,
|
||||||
|
allow_unicode=True,
|
||||||
|
default_flow_style=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _to_ms(seconds: float) -> int:
|
||||||
|
return int(round(seconds * 1000))
|
||||||
|
|
||||||
|
|
||||||
|
def _words_to_lyricsfile_words(word_timings) -> List[dict]:
|
||||||
|
"""Convert stable-ts word objects to Lyricsfile word objects.
|
||||||
|
|
||||||
|
Whisper's tokenization produces words with leading whitespace
|
||||||
|
(e.g. ``" club"``). Lyricsfile expects trailing whitespace except on
|
||||||
|
the final word. This re-attaches the leading space of word ``i+1``
|
||||||
|
onto the trailing edge of word ``i``. For CJK languages stable-ts
|
||||||
|
splits without spaces, in which case no spacing is injected.
|
||||||
|
"""
|
||||||
|
bodies = [(w.word or "").lstrip() for w in word_timings]
|
||||||
|
leading_spaces = [(w.word or "")[: len(w.word or "") - len((w.word or "").lstrip())] for w in word_timings]
|
||||||
|
|
||||||
|
out: List[dict] = []
|
||||||
|
for i, w in enumerate(word_timings):
|
||||||
|
text = bodies[i]
|
||||||
|
if i + 1 < len(word_timings) and leading_spaces[i + 1]:
|
||||||
|
text = text + " "
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"text": text,
|
||||||
|
"start_ms": _to_ms(w.start),
|
||||||
|
"end_ms": _to_ms(w.end),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
Reference in New Issue
Block a user