Files
txtlyric-to-lrc/tests/test_output.py
T

452 lines
16 KiB
Python
Raw Normal View History

"""Unit tests for ``txtlyric_to_lrc.output``.
Run from the repo root with::
python -m unittest discover tests
"""
from __future__ import annotations
import os
import sys
import unittest
from typing import List, Optional
import yaml
# The package is laid out as a script-style module
# (``txtlyric_to_lrc/main.py`` does ``from output import ...``), so we
# add the package directory to ``sys.path`` to import ``output`` directly.
_PKG_DIR = os.path.normpath(
os.path.join(os.path.dirname(__file__), os.pardir, "txtlyric_to_lrc")
)
if _PKG_DIR not in sys.path:
sys.path.insert(0, _PKG_DIR)
import output # noqa: E402
class _FakeWord:
def __init__(self, word: str, start: float, end: float):
self.word = word
self.start = start
self.end = end
class _FakeSegment:
def __init__(
self,
text: str,
start: float,
end: float,
words: Optional[List[_FakeWord]] = None,
):
self.text = text
self.start = start
self.end = end
self.words = words or []
class _FakeResult:
def __init__(self, segments: List[_FakeSegment]):
self.segments = segments
class ResultToLrcTests(unittest.TestCase):
def test_inserts_clear_marker_when_gap_exceeds_threshold(self):
result = _FakeResult([
_FakeSegment(" Hello world ", 1.0, 3.5),
_FakeSegment(" After the silence", 8.5, 11.0), # 5.0s gap
])
lrc = output.result_to_lrc(result, gap_threshold=1.5)
lines = lrc.strip().splitlines()
self.assertEqual(
lines,
[
"[00:01.00]Hello world",
"[00:03.50]",
"[00:08.50]After the silence",
"[00:11.00]",
],
)
def test_omits_clear_marker_when_gap_is_within_threshold(self):
result = _FakeResult([
_FakeSegment(" One", 1.0, 3.5),
_FakeSegment(" Two", 4.4, 6.0), # 0.9s gap, well under default
])
lrc = output.result_to_lrc(result)
lines = lrc.strip().splitlines()
self.assertEqual(
lines,
[
"[00:01.00]One",
"[00:04.40]Two",
"[00:06.00]",
],
)
def test_gap_threshold_argument_controls_marker_emission(self):
result = _FakeResult([
_FakeSegment(" One", 0.0, 2.0),
_FakeSegment(" Two", 3.7, 5.0), # 1.7s gap
])
lenient = output.result_to_lrc(result, gap_threshold=2.0).strip().splitlines()
strict = output.result_to_lrc(result, gap_threshold=1.5).strip().splitlines()
self.assertNotIn("[00:02.00]", lenient)
self.assertIn("[00:02.00]", strict)
def test_final_clear_marker_always_emitted(self):
result = _FakeResult([
_FakeSegment(" Only line", 1.0, 4.25),
])
lrc = output.result_to_lrc(result)
lines = lrc.strip().splitlines()
self.assertEqual(
lines,
[
"[00:01.00]Only line",
"[00:04.25]",
],
)
def test_empty_result_returns_empty_string(self):
self.assertEqual(output.result_to_lrc(_FakeResult([])), "")
def test_timestamp_handles_minutes_and_clamps_negative(self):
self.assertEqual(output._format_lrc_timestamp(75.5), "01:15.50")
self.assertEqual(output._format_lrc_timestamp(-1.0), "00:00.00")
class ResultToLyricsfileTests(unittest.TestCase):
METADATA = output.TrackMetadata(
title="Test Title",
artist="Test Artist",
album="Test Album",
duration_ms=240000,
language="en",
)
def _english_segment(self) -> _FakeSegment:
words = [
_FakeWord("The", 12.45, 12.90),
_FakeWord(" club", 12.90, 13.50),
_FakeWord(" isn't", 13.50, 14.20),
_FakeWord(" the", 14.20, 14.60),
_FakeWord(" lover", 17.10, 18.20),
]
return _FakeSegment("The club isn't the lover", 12.45, 18.20, words=words)
def _cjk_segment(self) -> _FakeSegment:
words = [
_FakeWord("", 1.0, 1.2),
_FakeWord("", 1.2, 1.4),
_FakeWord("", 1.4, 1.6),
_FakeWord("", 1.6, 1.9),
]
return _FakeSegment("你好世界", 1.0, 1.9, words=words)
def test_english_words_use_trailing_spaces_except_last(self):
result = _FakeResult([self._english_segment()])
doc = yaml.safe_load(
output.result_to_lyricsfile(result, self.METADATA)
)
words = doc["lines"][0]["words"]
self.assertEqual(
[w["text"] for w in words],
["The ", "club ", "isn't ", "the ", "lover"],
)
def test_english_words_concatenate_to_line_text(self):
result = _FakeResult([self._english_segment()])
doc = yaml.safe_load(
output.result_to_lyricsfile(result, self.METADATA)
)
line = doc["lines"][0]
self.assertEqual("".join(w["text"] for w in line["words"]), line["text"])
def test_cjk_words_have_no_spurious_spaces(self):
result = _FakeResult([self._cjk_segment()])
doc = yaml.safe_load(
output.result_to_lyricsfile(result, self.METADATA)
)
words = doc["lines"][0]["words"]
self.assertEqual([w["text"] for w in words], ["", "", "", ""])
self.assertEqual(
"".join(w["text"] for w in words), doc["lines"][0]["text"]
)
def test_word_timestamps_are_integer_milliseconds(self):
result = _FakeResult([self._english_segment()])
doc = yaml.safe_load(
output.result_to_lyricsfile(result, self.METADATA)
)
first_word = doc["lines"][0]["words"][0]
self.assertEqual(first_word["start_ms"], 12450)
self.assertEqual(first_word["end_ms"], 12900)
self.assertIsInstance(first_word["start_ms"], int)
self.assertIsInstance(first_word["end_ms"], int)
def test_segment_without_words_omits_words_key(self):
seg = _FakeSegment(" Bare line ", 20.0, 22.5)
result = _FakeResult([seg])
doc = yaml.safe_load(
output.result_to_lyricsfile(result, self.METADATA)
)
line = doc["lines"][0]
self.assertNotIn("words", line)
self.assertEqual(line["text"], "Bare line")
self.assertEqual(line["start_ms"], 20000)
self.assertEqual(line["end_ms"], 22500)
def test_metadata_optional_fields_omitted_when_unset(self):
meta = output.TrackMetadata(title="T", artist="A")
doc = yaml.safe_load(
output.result_to_lyricsfile(_FakeResult([]), meta)
)
self.assertEqual(doc["metadata"]["title"], "T")
self.assertEqual(doc["metadata"]["artist"], "A")
self.assertEqual(doc["metadata"]["instrumental"], False)
for absent in ("album", "duration_ms", "language"):
self.assertNotIn(absent, doc["metadata"])
def test_metadata_optional_fields_included_when_set(self):
doc = yaml.safe_load(
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
)
self.assertEqual(doc["metadata"]["album"], "Test Album")
self.assertEqual(doc["metadata"]["duration_ms"], 240000)
self.assertEqual(doc["metadata"]["language"], "en")
def test_plain_block_is_omitted(self):
doc = yaml.safe_load(
output.result_to_lyricsfile(
_FakeResult([self._english_segment()]), self.METADATA
)
)
self.assertNotIn("plain", doc)
def test_version_is_one_dot_zero(self):
doc = yaml.safe_load(
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
)
self.assertEqual(doc["version"], "1.0")
def test_apostrophe_round_trips_through_yaml(self):
result = _FakeResult([self._english_segment()])
rendered = output.result_to_lyricsfile(result, self.METADATA)
doc = yaml.safe_load(rendered)
self.assertEqual(doc["lines"][0]["words"][2]["text"], "isn't ")
class ParseLrcFileTests(unittest.TestCase):
def test_basic_timed_lines(self):
lrc = (
"[00:01.00]Hello world\n"
"[00:04.50]Second line\n"
"[00:08.00]Third line\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 3)
self.assertAlmostEqual(parsed.segments[0]["start"], 1.0)
self.assertAlmostEqual(parsed.segments[0]["end"], 4.5)
self.assertEqual(parsed.segments[0]["text"], "Hello world")
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
self.assertAlmostEqual(parsed.segments[1]["end"], 8.0)
self.assertEqual(parsed.segments[1]["text"], "Second line")
def test_gap_markers_set_end_on_preceding_segment(self):
lrc = (
"[00:01.00]First line\n"
"[00:03.00]\n"
"[00:10.00]After gap\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 2)
self.assertAlmostEqual(parsed.segments[0]["end"], 3.0)
self.assertAlmostEqual(parsed.segments[1]["start"], 10.0)
def test_last_segment_gets_default_end_when_no_trailing_marker(self):
lrc = "[00:05.00]Only line\n"
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 1)
self.assertAlmostEqual(parsed.segments[0]["end"], 10.0)
def test_last_segment_uses_trailing_gap_marker(self):
lrc = (
"[00:05.00]Last line\n"
"[00:09.50]\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 1)
self.assertAlmostEqual(parsed.segments[0]["end"], 9.5)
def test_metadata_tags_are_extracted(self):
lrc = (
"[ti:My Song]\n"
"[ar:My Artist]\n"
"[al:My Album]\n"
"[00:01.00]Hello\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(parsed.metadata_tags["title"], "My Song")
self.assertEqual(parsed.metadata_tags["artist"], "My Artist")
self.assertEqual(parsed.metadata_tags["album"], "My Album")
def test_language_tag_variants(self):
for tag in ("la", "lang", "language"):
lrc = f"[{tag}:ja]\n[00:01.00]Hello\n"
parsed = output.parse_lrc_file(lrc)
self.assertEqual(
parsed.metadata_tags.get("language"), "ja",
f"tag [{tag}:ja] should map to language=ja",
)
def test_length_tag_parsed_to_duration_ms(self):
lrc = "[length:3:40]\n[00:01.00]Hello\n"
parsed = output.parse_lrc_file(lrc)
self.assertEqual(parsed.metadata_tags["duration_ms"], str(220000))
def test_length_tag_with_fraction(self):
lrc = "[length:1:30.50]\n[00:01.00]Hello\n"
parsed = output.parse_lrc_file(lrc)
self.assertEqual(parsed.metadata_tags["duration_ms"], str(90500))
def test_offset_applied_to_timestamps(self):
lrc = (
"[offset:+500]\n"
"[00:01.00]Hello\n"
"[00:04.00]World\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertAlmostEqual(parsed.segments[0]["start"], 1.5)
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
def test_negative_offset_clamps_to_zero(self):
lrc = (
"[offset:-2000]\n"
"[00:01.00]Hello\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertAlmostEqual(parsed.segments[0]["start"], 0.0)
def test_unknown_tags_go_to_dropped(self):
lrc = (
"[re:LRC Editor v3.0]\n"
"[by:Some Person]\n"
"[ve:1.0]\n"
"[00:01.00]Hello\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertIn("re", parsed.dropped_tags)
self.assertIn("by", parsed.dropped_tags)
self.assertIn("ve", parsed.dropped_tags)
self.assertEqual(len(parsed.metadata_tags), 0)
def test_empty_content_returns_empty(self):
parsed = output.parse_lrc_file("")
self.assertEqual(len(parsed.segments), 0)
self.assertEqual(len(parsed.metadata_tags), 0)
def test_metadata_only_returns_no_segments(self):
lrc = "[ti:Title]\n[ar:Artist]\n"
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 0)
self.assertEqual(parsed.metadata_tags["title"], "Title")
def test_two_digit_fraction(self):
lrc = "[01:23.45]Line\n"
parsed = output.parse_lrc_file(lrc)
self.assertAlmostEqual(parsed.segments[0]["start"], 83.45)
def test_three_digit_fraction(self):
lrc = "[01:23.456]Line\n"
parsed = output.parse_lrc_file(lrc)
self.assertAlmostEqual(parsed.segments[0]["start"], 83.456)
def test_multiple_gap_markers_only_last_wins(self):
lrc = (
"[00:01.00]Line\n"
"[00:03.00]\n"
"[00:05.00]\n"
"[00:10.00]Next\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(len(parsed.segments), 2)
self.assertAlmostEqual(parsed.segments[0]["end"], 5.0)
def test_segments_sorted_by_timestamp(self):
lrc = (
"[00:10.00]Second\n"
"[00:01.00]First\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(parsed.segments[0]["text"], "First")
self.assertEqual(parsed.segments[1]["text"], "Second")
class MergeMetadataTests(unittest.TestCase):
def test_lrc_tags_override_placeholders(self):
parsed = output.ParsedLrc(
metadata_tags={"title": "LRC Title", "artist": "LRC Artist", "album": "LRC Album"},
)
# We can't easily call merge_metadata without an audio file, but
# we can test the logic by calling extract_metadata with a
# nonexistent path (mutagen returns None) and then merging.
meta = output.TrackMetadata(title="Unknown", artist="Unknown Artist")
# Simulate merge logic directly
tags = parsed.metadata_tags
meta.title = tags.get("title") or meta.title
meta.artist = tags.get("artist") or meta.artist
meta.album = tags.get("album") or meta.album
self.assertEqual(meta.title, "LRC Title")
self.assertEqual(meta.artist, "LRC Artist")
self.assertEqual(meta.album, "LRC Album")
def test_cli_language_overrides_lrc_tag(self):
parsed = output.ParsedLrc(
metadata_tags={"language": "ja"},
)
# merge_metadata passes cli_language first
cli_language = "en"
language = cli_language or parsed.metadata_tags.get("language")
self.assertEqual(language, "en")
def test_lrc_language_used_when_cli_absent(self):
parsed = output.ParsedLrc(
metadata_tags={"language": "ja"},
)
cli_language = None
language = cli_language or parsed.metadata_tags.get("language")
self.assertEqual(language, "ja")
def test_dropped_tags_populated(self):
lrc = (
"[re:SomeEditor]\n"
"[by:Author]\n"
"[ti:Title]\n"
"[00:01.00]Hello\n"
)
parsed = output.parse_lrc_file(lrc)
self.assertEqual(parsed.dropped_tags["re"], "SomeEditor")
self.assertEqual(parsed.dropped_tags["by"], "Author")
self.assertNotIn("ti", parsed.dropped_tags)
class FileIsAudioTests(unittest.TestCase):
def test_recognises_known_extensions(self):
for name in ("song.mp3", "track.flac", "tune.OPUS"):
self.assertTrue(
output.file_is_audio(name.lower()),
f"expected {name!r} to be recognised as audio",
)
def test_rejects_unknown_extensions(self):
for name in ("notes.txt", "cover.jpg", "lyrics.lrc", "lyrics.yaml"):
self.assertFalse(output.file_is_audio(name))
if __name__ == "__main__":
unittest.main()