diff --git a/.gitignore b/.gitignore index e69de29..cafd598 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +.venv/ \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index a35dd84..3a00698 100644 --- a/poetry.lock +++ b/poetry.lock @@ -364,6 +364,18 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] +[[package]] +name = "mutagen" +version = "1.47.0" +description = "read and write audio tags for many formats" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"}, + {file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"}, +] + [[package]] name = "networkx" version = "3.4.2" @@ -729,6 +741,101 @@ triton = {version = ">=2.0.0", markers = "platform_machine == \"x86_64\" and sys [package.extras] dev = ["black", "flake8", "isort", "pytest", "scipy"] +[[package]] +name = "packaging" +version = "26.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"}, + {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"}, + {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"}, + {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"}, + {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"}, + {file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"}, + {file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"}, + {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"}, + {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"}, + {file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"}, + {file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, +] + [[package]] name = "regex" version = "2024.11.6" @@ -877,17 +984,6 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] -[[package]] -name = "srt" -version = "3.5.3" -description = "A tiny library for parsing, modifying, and composing SRT files." -optional = false -python-versions = ">=2.7" -groups = ["main"] -files = [ - {file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"}, -] - [[package]] name = "stable-ts" version = "2.19.0" @@ -1155,4 +1251,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.1" python-versions = ">=3.10" -content-hash = "509d58adcee14dbd57cc252e3610fedb15e5a302767f6f8f190507250b1ff542" +content-hash = "218374baa7491f748761fd66d8ccfbc23957d1fd02d810f65f2be27cfb8cfc0d" diff --git a/pyproject.toml b/pyproject.toml index d61a0c6..d474acb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,10 @@ requires-python = ">=3.10" dependencies = [ "stable-ts (>=2.19.0,<3.0.0)", "click (>=8.2.1,<9.0.0)", - "srt (>=3.5.3,<4.0.0)" + "mutagen (>=1.47.0,<2.0.0)", + "pyyaml (>=6.0.0,<7.0.0)", + # Required at runtime by silero-vad + "packaging (>=21.0)" ] diff --git a/tests/test_output.py b/tests/test_output.py new file mode 100644 index 0000000..cecb80a --- /dev/null +++ b/tests/test_output.py @@ -0,0 +1,451 @@ +"""Unit tests for ``txtlyric_to_lrc.output``. + +Run from the repo root with:: + + python -m unittest discover tests +""" + +from __future__ import annotations + +import os +import sys +import unittest +from typing import List, Optional + +import yaml + +# The package is laid out as a script-style module +# (``txtlyric_to_lrc/main.py`` does ``from output import ...``), so we +# add the package directory to ``sys.path`` to import ``output`` directly. +_PKG_DIR = os.path.normpath( + os.path.join(os.path.dirname(__file__), os.pardir, "txtlyric_to_lrc") +) +if _PKG_DIR not in sys.path: + sys.path.insert(0, _PKG_DIR) + +import output # noqa: E402 + + +class _FakeWord: + def __init__(self, word: str, start: float, end: float): + self.word = word + self.start = start + self.end = end + + +class _FakeSegment: + def __init__( + self, + text: str, + start: float, + end: float, + words: Optional[List[_FakeWord]] = None, + ): + self.text = text + self.start = start + self.end = end + self.words = words or [] + + +class _FakeResult: + def __init__(self, segments: List[_FakeSegment]): + self.segments = segments + + +class ResultToLrcTests(unittest.TestCase): + def test_inserts_clear_marker_when_gap_exceeds_threshold(self): + result = _FakeResult([ + _FakeSegment(" Hello world ", 1.0, 3.5), + _FakeSegment(" After the silence", 8.5, 11.0), # 5.0s gap + ]) + lrc = output.result_to_lrc(result, gap_threshold=1.5) + lines = lrc.strip().splitlines() + self.assertEqual( + lines, + [ + "[00:01.00]Hello world", + "[00:03.50]", + "[00:08.50]After the silence", + "[00:11.00]", + ], + ) + + def test_omits_clear_marker_when_gap_is_within_threshold(self): + result = _FakeResult([ + _FakeSegment(" One", 1.0, 3.5), + _FakeSegment(" Two", 4.4, 6.0), # 0.9s gap, well under default + ]) + lrc = output.result_to_lrc(result) + lines = lrc.strip().splitlines() + self.assertEqual( + lines, + [ + "[00:01.00]One", + "[00:04.40]Two", + "[00:06.00]", + ], + ) + + def test_gap_threshold_argument_controls_marker_emission(self): + result = _FakeResult([ + _FakeSegment(" One", 0.0, 2.0), + _FakeSegment(" Two", 3.7, 5.0), # 1.7s gap + ]) + lenient = output.result_to_lrc(result, gap_threshold=2.0).strip().splitlines() + strict = output.result_to_lrc(result, gap_threshold=1.5).strip().splitlines() + + self.assertNotIn("[00:02.00]", lenient) + self.assertIn("[00:02.00]", strict) + + def test_final_clear_marker_always_emitted(self): + result = _FakeResult([ + _FakeSegment(" Only line", 1.0, 4.25), + ]) + lrc = output.result_to_lrc(result) + lines = lrc.strip().splitlines() + self.assertEqual( + lines, + [ + "[00:01.00]Only line", + "[00:04.25]", + ], + ) + + def test_empty_result_returns_empty_string(self): + self.assertEqual(output.result_to_lrc(_FakeResult([])), "") + + def test_timestamp_handles_minutes_and_clamps_negative(self): + self.assertEqual(output._format_lrc_timestamp(75.5), "01:15.50") + self.assertEqual(output._format_lrc_timestamp(-1.0), "00:00.00") + + +class ResultToLyricsfileTests(unittest.TestCase): + METADATA = output.TrackMetadata( + title="Test Title", + artist="Test Artist", + album="Test Album", + duration_ms=240000, + language="en", + ) + + def _english_segment(self) -> _FakeSegment: + words = [ + _FakeWord("The", 12.45, 12.90), + _FakeWord(" club", 12.90, 13.50), + _FakeWord(" isn't", 13.50, 14.20), + _FakeWord(" the", 14.20, 14.60), + _FakeWord(" lover", 17.10, 18.20), + ] + return _FakeSegment("The club isn't the lover", 12.45, 18.20, words=words) + + def _cjk_segment(self) -> _FakeSegment: + words = [ + _FakeWord("你", 1.0, 1.2), + _FakeWord("好", 1.2, 1.4), + _FakeWord("世", 1.4, 1.6), + _FakeWord("界", 1.6, 1.9), + ] + return _FakeSegment("你好世界", 1.0, 1.9, words=words) + + def test_english_words_use_trailing_spaces_except_last(self): + result = _FakeResult([self._english_segment()]) + doc = yaml.safe_load( + output.result_to_lyricsfile(result, self.METADATA) + ) + words = doc["lines"][0]["words"] + self.assertEqual( + [w["text"] for w in words], + ["The ", "club ", "isn't ", "the ", "lover"], + ) + + def test_english_words_concatenate_to_line_text(self): + result = _FakeResult([self._english_segment()]) + doc = yaml.safe_load( + output.result_to_lyricsfile(result, self.METADATA) + ) + line = doc["lines"][0] + self.assertEqual("".join(w["text"] for w in line["words"]), line["text"]) + + def test_cjk_words_have_no_spurious_spaces(self): + result = _FakeResult([self._cjk_segment()]) + doc = yaml.safe_load( + output.result_to_lyricsfile(result, self.METADATA) + ) + words = doc["lines"][0]["words"] + self.assertEqual([w["text"] for w in words], ["你", "好", "世", "界"]) + self.assertEqual( + "".join(w["text"] for w in words), doc["lines"][0]["text"] + ) + + def test_word_timestamps_are_integer_milliseconds(self): + result = _FakeResult([self._english_segment()]) + doc = yaml.safe_load( + output.result_to_lyricsfile(result, self.METADATA) + ) + first_word = doc["lines"][0]["words"][0] + self.assertEqual(first_word["start_ms"], 12450) + self.assertEqual(first_word["end_ms"], 12900) + self.assertIsInstance(first_word["start_ms"], int) + self.assertIsInstance(first_word["end_ms"], int) + + def test_segment_without_words_omits_words_key(self): + seg = _FakeSegment(" Bare line ", 20.0, 22.5) + result = _FakeResult([seg]) + doc = yaml.safe_load( + output.result_to_lyricsfile(result, self.METADATA) + ) + line = doc["lines"][0] + self.assertNotIn("words", line) + self.assertEqual(line["text"], "Bare line") + self.assertEqual(line["start_ms"], 20000) + self.assertEqual(line["end_ms"], 22500) + + def test_metadata_optional_fields_omitted_when_unset(self): + meta = output.TrackMetadata(title="T", artist="A") + doc = yaml.safe_load( + output.result_to_lyricsfile(_FakeResult([]), meta) + ) + self.assertEqual(doc["metadata"]["title"], "T") + self.assertEqual(doc["metadata"]["artist"], "A") + self.assertEqual(doc["metadata"]["instrumental"], False) + for absent in ("album", "duration_ms", "language"): + self.assertNotIn(absent, doc["metadata"]) + + def test_metadata_optional_fields_included_when_set(self): + doc = yaml.safe_load( + output.result_to_lyricsfile(_FakeResult([]), self.METADATA) + ) + self.assertEqual(doc["metadata"]["album"], "Test Album") + self.assertEqual(doc["metadata"]["duration_ms"], 240000) + self.assertEqual(doc["metadata"]["language"], "en") + + def test_plain_block_is_omitted(self): + doc = yaml.safe_load( + output.result_to_lyricsfile( + _FakeResult([self._english_segment()]), self.METADATA + ) + ) + self.assertNotIn("plain", doc) + + def test_version_is_one_dot_zero(self): + doc = yaml.safe_load( + output.result_to_lyricsfile(_FakeResult([]), self.METADATA) + ) + self.assertEqual(doc["version"], "1.0") + + def test_apostrophe_round_trips_through_yaml(self): + result = _FakeResult([self._english_segment()]) + rendered = output.result_to_lyricsfile(result, self.METADATA) + doc = yaml.safe_load(rendered) + self.assertEqual(doc["lines"][0]["words"][2]["text"], "isn't ") + + +class ParseLrcFileTests(unittest.TestCase): + def test_basic_timed_lines(self): + lrc = ( + "[00:01.00]Hello world\n" + "[00:04.50]Second line\n" + "[00:08.00]Third line\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 3) + self.assertAlmostEqual(parsed.segments[0]["start"], 1.0) + self.assertAlmostEqual(parsed.segments[0]["end"], 4.5) + self.assertEqual(parsed.segments[0]["text"], "Hello world") + self.assertAlmostEqual(parsed.segments[1]["start"], 4.5) + self.assertAlmostEqual(parsed.segments[1]["end"], 8.0) + self.assertEqual(parsed.segments[1]["text"], "Second line") + + def test_gap_markers_set_end_on_preceding_segment(self): + lrc = ( + "[00:01.00]First line\n" + "[00:03.00]\n" + "[00:10.00]After gap\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 2) + self.assertAlmostEqual(parsed.segments[0]["end"], 3.0) + self.assertAlmostEqual(parsed.segments[1]["start"], 10.0) + + def test_last_segment_gets_default_end_when_no_trailing_marker(self): + lrc = "[00:05.00]Only line\n" + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 1) + self.assertAlmostEqual(parsed.segments[0]["end"], 10.0) + + def test_last_segment_uses_trailing_gap_marker(self): + lrc = ( + "[00:05.00]Last line\n" + "[00:09.50]\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 1) + self.assertAlmostEqual(parsed.segments[0]["end"], 9.5) + + def test_metadata_tags_are_extracted(self): + lrc = ( + "[ti:My Song]\n" + "[ar:My Artist]\n" + "[al:My Album]\n" + "[00:01.00]Hello\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(parsed.metadata_tags["title"], "My Song") + self.assertEqual(parsed.metadata_tags["artist"], "My Artist") + self.assertEqual(parsed.metadata_tags["album"], "My Album") + + def test_language_tag_variants(self): + for tag in ("la", "lang", "language"): + lrc = f"[{tag}:ja]\n[00:01.00]Hello\n" + parsed = output.parse_lrc_file(lrc) + self.assertEqual( + parsed.metadata_tags.get("language"), "ja", + f"tag [{tag}:ja] should map to language=ja", + ) + + def test_length_tag_parsed_to_duration_ms(self): + lrc = "[length:3:40]\n[00:01.00]Hello\n" + parsed = output.parse_lrc_file(lrc) + self.assertEqual(parsed.metadata_tags["duration_ms"], str(220000)) + + def test_length_tag_with_fraction(self): + lrc = "[length:1:30.50]\n[00:01.00]Hello\n" + parsed = output.parse_lrc_file(lrc) + self.assertEqual(parsed.metadata_tags["duration_ms"], str(90500)) + + def test_offset_applied_to_timestamps(self): + lrc = ( + "[offset:+500]\n" + "[00:01.00]Hello\n" + "[00:04.00]World\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertAlmostEqual(parsed.segments[0]["start"], 1.5) + self.assertAlmostEqual(parsed.segments[1]["start"], 4.5) + + def test_negative_offset_clamps_to_zero(self): + lrc = ( + "[offset:-2000]\n" + "[00:01.00]Hello\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertAlmostEqual(parsed.segments[0]["start"], 0.0) + + def test_unknown_tags_go_to_dropped(self): + lrc = ( + "[re:LRC Editor v3.0]\n" + "[by:Some Person]\n" + "[ve:1.0]\n" + "[00:01.00]Hello\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertIn("re", parsed.dropped_tags) + self.assertIn("by", parsed.dropped_tags) + self.assertIn("ve", parsed.dropped_tags) + self.assertEqual(len(parsed.metadata_tags), 0) + + def test_empty_content_returns_empty(self): + parsed = output.parse_lrc_file("") + self.assertEqual(len(parsed.segments), 0) + self.assertEqual(len(parsed.metadata_tags), 0) + + def test_metadata_only_returns_no_segments(self): + lrc = "[ti:Title]\n[ar:Artist]\n" + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 0) + self.assertEqual(parsed.metadata_tags["title"], "Title") + + def test_two_digit_fraction(self): + lrc = "[01:23.45]Line\n" + parsed = output.parse_lrc_file(lrc) + self.assertAlmostEqual(parsed.segments[0]["start"], 83.45) + + def test_three_digit_fraction(self): + lrc = "[01:23.456]Line\n" + parsed = output.parse_lrc_file(lrc) + self.assertAlmostEqual(parsed.segments[0]["start"], 83.456) + + def test_multiple_gap_markers_only_last_wins(self): + lrc = ( + "[00:01.00]Line\n" + "[00:03.00]\n" + "[00:05.00]\n" + "[00:10.00]Next\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(len(parsed.segments), 2) + self.assertAlmostEqual(parsed.segments[0]["end"], 5.0) + + def test_segments_sorted_by_timestamp(self): + lrc = ( + "[00:10.00]Second\n" + "[00:01.00]First\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(parsed.segments[0]["text"], "First") + self.assertEqual(parsed.segments[1]["text"], "Second") + + +class MergeMetadataTests(unittest.TestCase): + def test_lrc_tags_override_placeholders(self): + parsed = output.ParsedLrc( + metadata_tags={"title": "LRC Title", "artist": "LRC Artist", "album": "LRC Album"}, + ) + # We can't easily call merge_metadata without an audio file, but + # we can test the logic by calling extract_metadata with a + # nonexistent path (mutagen returns None) and then merging. + meta = output.TrackMetadata(title="Unknown", artist="Unknown Artist") + # Simulate merge logic directly + tags = parsed.metadata_tags + meta.title = tags.get("title") or meta.title + meta.artist = tags.get("artist") or meta.artist + meta.album = tags.get("album") or meta.album + self.assertEqual(meta.title, "LRC Title") + self.assertEqual(meta.artist, "LRC Artist") + self.assertEqual(meta.album, "LRC Album") + + def test_cli_language_overrides_lrc_tag(self): + parsed = output.ParsedLrc( + metadata_tags={"language": "ja"}, + ) + # merge_metadata passes cli_language first + cli_language = "en" + language = cli_language or parsed.metadata_tags.get("language") + self.assertEqual(language, "en") + + def test_lrc_language_used_when_cli_absent(self): + parsed = output.ParsedLrc( + metadata_tags={"language": "ja"}, + ) + cli_language = None + language = cli_language or parsed.metadata_tags.get("language") + self.assertEqual(language, "ja") + + def test_dropped_tags_populated(self): + lrc = ( + "[re:SomeEditor]\n" + "[by:Author]\n" + "[ti:Title]\n" + "[00:01.00]Hello\n" + ) + parsed = output.parse_lrc_file(lrc) + self.assertEqual(parsed.dropped_tags["re"], "SomeEditor") + self.assertEqual(parsed.dropped_tags["by"], "Author") + self.assertNotIn("ti", parsed.dropped_tags) + + +class FileIsAudioTests(unittest.TestCase): + def test_recognises_known_extensions(self): + for name in ("song.mp3", "track.flac", "tune.OPUS"): + self.assertTrue( + output.file_is_audio(name.lower()), + f"expected {name!r} to be recognised as audio", + ) + + def test_rejects_unknown_extensions(self): + for name in ("notes.txt", "cover.jpg", "lyrics.lrc", "lyrics.yaml"): + self.assertFalse(output.file_is_audio(name)) + + +if __name__ == "__main__": + unittest.main() diff --git a/txtlyric_to_lrc/main.py b/txtlyric_to_lrc/main.py index f9780d4..416e3b1 100644 --- a/txtlyric_to_lrc/main.py +++ b/txtlyric_to_lrc/main.py @@ -2,79 +2,145 @@ import os import click import stable_whisper -import srt -import datetime -song_file_extensions = set([ - "mp3", # explicitly supported by whisper - "wav", - "mp4", - "mpeg", - "mpga", - "m4a", - "webm", - "flac", # stable-ts likely uses ffmpeg to convert this for whisper - "opus", -]) +from output import ( + extract_metadata, + file_is_audio, + merge_metadata, + parse_lrc_file, + result_to_lrc, + result_to_lyricsfile, +) + + +OUTPUT_EXTENSIONS = { + "yaml": "yaml", + "lrc": "lrc", +} + @click.command() @click.help_option("--help", "-h") -@click.option('--model', '-m', default='small', help='Which whisper model to use (choices are those of whisper.available_models)') -@click.option('--language', '-l', required=True, help='What language the lyrics are in') -@click.argument('directory') -def main(model, language, directory): +@click.option( + "--model", + "-m", + default="small", + help="Which whisper model to use (choices are those of whisper.available_models)", +) +@click.option( + "--language", + "-l", + required=True, + help="What language the lyrics are in", +) +@click.option( + "--format", + "-f", + "output_format", + type=click.Choice(["yaml", "lrc"], case_sensitive=False), + default="yaml", + show_default=True, + help="Output format. 'yaml' writes a Lyricsfile (.yaml) with word-level timings; 'lrc' writes classic LRC (.lrc).", +) +@click.argument("directory") +def main(model, language, output_format, directory): + output_format = output_format.lower() + output_ext = OUTPUT_EXTENSIONS[output_format] + print(f"Loading {model} model for use with language {language}...") model = stable_whisper.load_model(model) for (dirpath, _, filenames) in os.walk(directory): - if any([ file_is_audio(f) for f in filenames ]): - # audio files exist here, so let's process this folder - for f in filenames: - if file_is_audio(f): - filename = os.path.join(dirpath, f) - # get corresponding txt file name - # (we expect unaligned files to have the same filename as the song, except for the extension) - unaligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".txt") - # (we also expect existing .lrc files to contain aligned lyrics) - aligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".lrc") + if not any(file_is_audio(f) for f in filenames): + continue + for f in filenames: + if not file_is_audio(f): + continue + audio_path = os.path.join(dirpath, f) + base, _ = os.path.splitext(audio_path) + output_path = base + "." + output_ext - # run model to get aligned srt - if not os.path.exists(unaligned_lyrics_filename): - print(f"No corresponding unaligned lyric txt exists for {filename}") - continue - if os.path.exists(aligned_lyrics_filename): - print(f"Corresponding aligned lyric txt already exists for {filename}") - continue - print(f"Aligning lyrics for {filename}") - - result: stable_whisper.WhisperResult = model.align(filename, open(unaligned_lyrics_filename).read(), language=language, original_split=True, regroup=False) + if os.path.exists(output_path): + print(f"Aligned lyric file already exists for {audio_path}: {output_path}") + continue - # turn srt to lrc - srt = result.to_srt_vtt(filepath=None, word_level=False) - lrc = srt_to_lrc(srt) - print(f"Writing aligned lyrics at: {aligned_lyrics_filename}") - open(aligned_lyrics_filename, "x").write(lrc) - -def get_file_extension(filename: str) -> str: - # "asdf.omg.lol" -> [asdf.omg, .lol] -> lol - return os.path.splitext(filename)[1][1:] + lrc_path = base + ".lrc" + txt_path = base + ".txt" -def file_is_audio(filename: str) -> bool: - return get_file_extension(filename) in song_file_extensions + if os.path.exists(lrc_path): + _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language) + elif os.path.exists(txt_path): + _align_from_txt(model, audio_path, txt_path, output_path, output_format, language) + else: + print(f"No .lrc or .txt sidecar found for {audio_path}") -def timedelta_to_hhmmssss(td: datetime.timedelta) -> str: - dt = datetime.datetime(1969, 1, 1) + td - return dt.strftime('%M:%S.%f')[:-4] -def srt_to_lrc(srt_text: str) -> str: - subs = list(srt.parse(srt_text)) - lines = [f"[{timedelta_to_hhmmssss(s.start)}]{s.content}" for s in subs] +def _align_from_txt(model, audio_path, txt_path, output_path, output_format, language): + """Full alignment from plain text: determines line boundaries and word timings.""" + print(f"Aligning lyrics for {audio_path} (from .txt)") + with open(txt_path) as fh: + unaligned_text = fh.read() + + result = model.align( + audio_path, + unaligned_text, + language=language, + original_split=True, + regroup=False, + vad=True, + ) + + if not result or not list(result.segments): + print(f" Alignment produced no segments; skipping") + return + + result.adjust_gaps(one_section=True) + + if output_format == "yaml": + metadata = extract_metadata(audio_path, language=language) + content = result_to_lyricsfile(result, metadata) + else: + content = result_to_lrc(result) + + print(f" Writing aligned lyrics at: {output_path}") + with open(output_path, "x") as out_fh: + out_fh.write(content) + + +def _refine_from_lrc(model, audio_path, lrc_path, output_path, output_format, language): + """Word-level refinement of existing LRC: keeps line boundaries, adds word timings.""" + print(f"Refining lyrics for {audio_path} (from .lrc)") + with open(lrc_path) as fh: + lrc_content = fh.read() + + parsed = parse_lrc_file(lrc_content) + + if not parsed.segments: + print(f" No timed lines found in {lrc_path}; skipping") + return + + result = model.align_words( + audio_path, + parsed.segments, + language=language, + vad=True, + regroup=False, + ) + + if not result or not list(result.segments): + print(f" Word alignment produced no segments; skipping") + return + + if output_format == "yaml": + metadata = merge_metadata(audio_path, parsed, cli_language=language) + content = result_to_lyricsfile(result, metadata) + else: + content = result_to_lrc(result) + + print(f" Writing refined lyrics at: {output_path}") + with open(output_path, "x") as out_fh: + out_fh.write(content) - # add the end of lyrics marker - end_time = subs[-1].end - lines.append(f"[{timedelta_to_hhmmssss(end_time)}]") - return "\n".join(lines) if __name__ == "__main__": main() - diff --git a/txtlyric_to_lrc/output.py b/txtlyric_to_lrc/output.py new file mode 100644 index 0000000..986d964 --- /dev/null +++ b/txtlyric_to_lrc/output.py @@ -0,0 +1,431 @@ +"""Output writers, LRC parsing, and metadata extraction for txtlyric-to-lrc. + +Writers: + +- :func:`result_to_lrc` -- emits classic LRC, inserting a clear-display + marker whenever the gap between consecutive segments exceeds a threshold + so that lines do not visually persist through instrumental pauses. +- :func:`result_to_lyricsfile` -- emits the YAML-based Lyricsfile format + used by lrcget/lrclib, with word-level timings. + +LRC input: + +- :func:`parse_lrc_file` -- parses an LRC file into metadata tags and + timed segments (with ``start``/``end``/``text``) suitable for + ``stable_whisper.align_words``. +""" + +from __future__ import annotations + +import os +import re +import sys +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import mutagen +import stable_whisper +import yaml + + +song_file_extensions = { + "mp3", + "wav", + "mp4", + "mpeg", + "mpga", + "m4a", + "webm", + "flac", + "opus", +} + + +def get_file_extension(filename: str) -> str: + return os.path.splitext(filename)[1][1:] + + +def file_is_audio(filename: str) -> bool: + return get_file_extension(filename) in song_file_extensions + + +# --------------------------------------------------------------------------- +# Metadata +# --------------------------------------------------------------------------- + +@dataclass +class TrackMetadata: + title: str + artist: str + album: Optional[str] = None + duration_ms: Optional[int] = None + language: Optional[str] = None + offset_ms: Optional[int] = None + + +def extract_metadata(audio_path: str, language: Optional[str] = None) -> TrackMetadata: + """Read tags + duration from the audio file via mutagen. + + Falls back to the placeholder strings ``"Unknown"`` / ``"Unknown Artist"`` + when the corresponding tag is missing, since Lyricsfile requires + ``metadata.title`` and ``metadata.artist``. + """ + f = mutagen.File(audio_path, easy=True) + return TrackMetadata( + title=_first_tag(f, "title") or "Unknown", + artist=_first_tag(f, "artist") or "Unknown Artist", + album=_first_tag(f, "album"), + duration_ms=_duration_ms(f), + language=language, + ) + + +def _first_tag(f, key: str) -> Optional[str]: + if f is None: + return None + val = f.get(key) + if not val: + return None + if isinstance(val, list): + val = val[0] if val else None + if val is None: + return None + val = str(val).strip() + return val or None + + +def _duration_ms(f) -> Optional[int]: + info = getattr(f, "info", None) if f is not None else None + length = getattr(info, "length", None) if info is not None else None + if length is None: + return None + return int(round(length * 1000)) + + +# --------------------------------------------------------------------------- +# LRC parsing +# --------------------------------------------------------------------------- + +_LRC_METADATA_KEYS_TO_LYRICSFILE = { + "ti": "title", + "ar": "artist", + "al": "album", + "la": "language", + "lang": "language", + "language": "language", +} + +_TIMESTAMP_RE = re.compile(r"^(\d{1,3}):(\d{2})(?:[.:](\d{2,3}))?$") +_LINE_RE = re.compile(r"^\[([^\]]+)\](.*)$") + + +@dataclass +class ParsedLrc: + """Result of parsing an LRC file.""" + segments: List[dict] = field(default_factory=list) + metadata_tags: Dict[str, str] = field(default_factory=dict) + dropped_tags: Dict[str, str] = field(default_factory=dict) + + +def _parse_lrc_timestamp(token: str) -> Optional[float]: + """Parse an LRC timestamp token to seconds, or None if not a timestamp.""" + m = _TIMESTAMP_RE.match(token.strip()) + if not m: + return None + minutes = int(m.group(1)) + seconds = int(m.group(2)) + frac_raw = m.group(3) or "0" + if len(frac_raw) == 2: + frac_ms = int(frac_raw) * 10 + else: + frac_ms = int(frac_raw) + return minutes * 60 + seconds + frac_ms / 1000.0 + + +def _parse_lrc_length(value: str) -> Optional[int]: + """Parse an LRC [length:...] value to milliseconds.""" + value = value.strip() + m = re.match(r"^(\d+):(\d{2})(?:[.:](\d{2,3}))?$", value) + if not m: + return None + minutes = int(m.group(1)) + seconds = int(m.group(2)) + frac_raw = m.group(3) or "0" + if len(frac_raw) == 2: + frac_ms = int(frac_raw) * 10 + else: + frac_ms = int(frac_raw) + return (minutes * 60 + seconds) * 1000 + frac_ms + + +def _parse_lrc_offset(value: str) -> Optional[int]: + """Parse an LRC [offset:...] value to milliseconds (integer, may be negative).""" + value = value.strip() + m = re.match(r"^([+-]?\d+)$", value) + if not m: + return None + return int(m.group(1)) + + +def parse_lrc_file(content: str) -> ParsedLrc: + """Parse LRC content into timed segments and metadata. + + Returns a :class:`ParsedLrc` containing: + + - ``segments``: list of ``{"start": float, "end": float, "text": str}`` + suitable for ``model.align_words()``. Gap markers (empty-text + timestamps) contribute an ``end`` to the preceding segment but do not + appear as segments themselves. + - ``metadata_tags``: dict of recognised LRC metadata mapped to + Lyricsfile-compatible keys. + - ``dropped_tags``: dict of LRC metadata tags that have no Lyricsfile + equivalent (warned about by the caller). + + If the LRC contains an ``[offset:...]`` tag, the offset is applied to + all parsed timestamps so that the returned times are absolute. + """ + result = ParsedLrc() + + raw_timed: List[Tuple[float, str]] = [] + + for raw_line in content.splitlines(): + raw_line = raw_line.strip() + if not raw_line: + continue + m = _LINE_RE.match(raw_line) + if not m: + continue + + bracket_content = m.group(1) + after_bracket = m.group(2) + + ts = _parse_lrc_timestamp(bracket_content) + if ts is not None: + raw_timed.append((ts, after_bracket.strip())) + continue + + # Metadata tag: [key:value] + if ":" in bracket_content: + key, _, value = bracket_content.partition(":") + key = key.strip().lower() + value = value.strip() + if not value: + continue + + lyricsfile_key = _LRC_METADATA_KEYS_TO_LYRICSFILE.get(key) + if lyricsfile_key is not None: + result.metadata_tags[lyricsfile_key] = value + elif key == "length": + length_ms = _parse_lrc_length(value) + if length_ms is not None: + result.metadata_tags["duration_ms"] = str(length_ms) + else: + result.dropped_tags[key] = value + elif key == "offset": + offset_ms = _parse_lrc_offset(value) + if offset_ms is not None: + result.metadata_tags["offset_ms"] = str(offset_ms) + else: + result.dropped_tags[key] = value + else: + result.dropped_tags[key] = value + + if not raw_timed: + return result + + raw_timed.sort(key=lambda t: t[0]) + + # Apply offset: shift all timestamps so output is absolute + offset_s = 0.0 + if "offset_ms" in result.metadata_tags: + offset_s = int(result.metadata_tags["offset_ms"]) / 1000.0 + + adjusted: List[Tuple[float, str]] = [ + (max(0.0, ts + offset_s), text) for ts, text in raw_timed + ] + + # Build segments: non-empty text lines become segments; empty-text + # lines (gap markers) contribute an end time to the preceding segment. + pending_segments: List[dict] = [] + for ts, text in adjusted: + if text: + pending_segments.append({"start": ts, "end": None, "text": text}) + elif pending_segments: + # Gap marker: set the preceding segment's end + pending_segments[-1]["end"] = ts + + # Fill in missing end times: end of seg N = start of seg N+1 + for i in range(len(pending_segments) - 1): + if pending_segments[i]["end"] is None: + pending_segments[i]["end"] = pending_segments[i + 1]["start"] + + # Last segment: if no explicit end (no trailing gap marker), use + # start + 5s as a reasonable upper bound; align_words will confine + # within whatever audio is available. + if pending_segments and pending_segments[-1]["end"] is None: + pending_segments[-1]["end"] = pending_segments[-1]["start"] + 5.0 + + result.segments = pending_segments + return result + + +def merge_metadata( + audio_path: str, + lrc_parsed: ParsedLrc, + cli_language: Optional[str] = None, +) -> TrackMetadata: + """Build a :class:`TrackMetadata` by merging LRC tags over mutagen tags. + + Precedence (highest first): LRC tag → mutagen tag → placeholder. + The ``--language`` CLI flag overrides both LRC and mutagen for language. + + Warnings are printed to stderr for LRC metadata tags that have no + Lyricsfile equivalent and are therefore dropped. + """ + audio_meta = extract_metadata(audio_path, language=cli_language) + + tags = lrc_parsed.metadata_tags + + title = tags.get("title") or audio_meta.title + artist = tags.get("artist") or audio_meta.artist + album = tags.get("album") or audio_meta.album + + duration_ms = audio_meta.duration_ms + if "duration_ms" in tags: + try: + duration_ms = int(tags["duration_ms"]) + except ValueError: + pass + + language = cli_language or tags.get("language") or audio_meta.language + + for key, value in lrc_parsed.dropped_tags.items(): + print( + f" Warning: LRC tag [{key}:{value}] has no Lyricsfile equivalent; skipped", + file=sys.stderr, + ) + + return TrackMetadata( + title=title, + artist=artist, + album=album, + duration_ms=duration_ms, + language=language, + ) + + +def _format_lrc_timestamp(seconds: float) -> str: + if seconds < 0: + seconds = 0.0 + minutes = int(seconds // 60) + remainder = seconds - minutes * 60 + return f"{minutes:02d}:{remainder:05.2f}" + + +def result_to_lrc( + result: "stable_whisper.WhisperResult", + gap_threshold: float = 1.5, +) -> str: + """Render ``result`` as LRC text. + + Between consecutive segments, if ``next_segment.start - this_segment.end`` + exceeds ``gap_threshold`` seconds, an empty timestamp is emitted at + ``this_segment.end`` so consumers stop displaying the line during the + pause. A trailing empty timestamp at the end of the last segment is + always emitted. + """ + segments = list(result.segments) + if not segments: + return "" + + lines: List[str] = [] + for i, seg in enumerate(segments): + text = (seg.text or "").strip() + lines.append(f"[{_format_lrc_timestamp(seg.start)}]{text}") + next_seg = segments[i + 1] if i + 1 < len(segments) else None + if next_seg is None: + lines.append(f"[{_format_lrc_timestamp(seg.end)}]") + elif next_seg.start - seg.end > gap_threshold: + lines.append(f"[{_format_lrc_timestamp(seg.end)}]") + + return "\n".join(lines) + "\n" + + +def result_to_lyricsfile( + result: "stable_whisper.WhisperResult", + metadata: TrackMetadata, +) -> str: + """Render ``result`` as a Lyricsfile YAML string. + + The ``plain`` block is intentionally omitted; consumers receive only + the synced ``lines`` array (with word-level timings when available). + """ + metadata_obj = { + "title": metadata.title, + "artist": metadata.artist, + } + if metadata.album: + metadata_obj["album"] = metadata.album + if metadata.duration_ms is not None: + metadata_obj["duration_ms"] = metadata.duration_ms + if metadata.language: + metadata_obj["language"] = metadata.language + if metadata.offset_ms is not None: + metadata_obj["offset_ms"] = metadata.offset_ms + metadata_obj["instrumental"] = False + + lines_out = [] + for seg in result.segments: + line_obj = { + "text": (seg.text or "").strip(), + "start_ms": _to_ms(seg.start), + "end_ms": _to_ms(seg.end), + } + word_objs = _words_to_lyricsfile_words(seg.words) if seg.words else None + if word_objs: + line_obj["words"] = word_objs + lines_out.append(line_obj) + + document = { + "version": "1.0", + "metadata": metadata_obj, + "lines": lines_out, + } + + return yaml.safe_dump( + document, + sort_keys=False, + allow_unicode=True, + default_flow_style=False, + ) + + +def _to_ms(seconds: float) -> int: + return int(round(seconds * 1000)) + + +def _words_to_lyricsfile_words(word_timings) -> List[dict]: + """Convert stable-ts word objects to Lyricsfile word objects. + + Whisper's tokenization produces words with leading whitespace + (e.g. ``" club"``). Lyricsfile expects trailing whitespace except on + the final word. This re-attaches the leading space of word ``i+1`` + onto the trailing edge of word ``i``. For CJK languages stable-ts + splits without spaces, in which case no spacing is injected. + """ + bodies = [(w.word or "").lstrip() for w in word_timings] + leading_spaces = [(w.word or "")[: len(w.word or "") - len((w.word or "").lstrip())] for w in word_timings] + + out: List[dict] = [] + for i, w in enumerate(word_timings): + text = bodies[i] + if i + 1 < len(word_timings) and leading_spaces[i + 1]: + text = text + " " + out.append( + { + "text": text, + "start_ms": _to_ms(w.start), + "end_ms": _to_ms(w.end), + } + ) + return out