452 lines
16 KiB
Python
452 lines
16 KiB
Python
|
|
"""Unit tests for ``txtlyric_to_lrc.output``.
|
||
|
|
|
||
|
|
Run from the repo root with::
|
||
|
|
|
||
|
|
python -m unittest discover tests
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import unittest
|
||
|
|
from typing import List, Optional
|
||
|
|
|
||
|
|
import yaml
|
||
|
|
|
||
|
|
# The package is laid out as a script-style module
|
||
|
|
# (``txtlyric_to_lrc/main.py`` does ``from output import ...``), so we
|
||
|
|
# add the package directory to ``sys.path`` to import ``output`` directly.
|
||
|
|
_PKG_DIR = os.path.normpath(
|
||
|
|
os.path.join(os.path.dirname(__file__), os.pardir, "txtlyric_to_lrc")
|
||
|
|
)
|
||
|
|
if _PKG_DIR not in sys.path:
|
||
|
|
sys.path.insert(0, _PKG_DIR)
|
||
|
|
|
||
|
|
import output # noqa: E402
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeWord:
|
||
|
|
def __init__(self, word: str, start: float, end: float):
|
||
|
|
self.word = word
|
||
|
|
self.start = start
|
||
|
|
self.end = end
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeSegment:
|
||
|
|
def __init__(
|
||
|
|
self,
|
||
|
|
text: str,
|
||
|
|
start: float,
|
||
|
|
end: float,
|
||
|
|
words: Optional[List[_FakeWord]] = None,
|
||
|
|
):
|
||
|
|
self.text = text
|
||
|
|
self.start = start
|
||
|
|
self.end = end
|
||
|
|
self.words = words or []
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeResult:
|
||
|
|
def __init__(self, segments: List[_FakeSegment]):
|
||
|
|
self.segments = segments
|
||
|
|
|
||
|
|
|
||
|
|
class ResultToLrcTests(unittest.TestCase):
|
||
|
|
def test_inserts_clear_marker_when_gap_exceeds_threshold(self):
|
||
|
|
result = _FakeResult([
|
||
|
|
_FakeSegment(" Hello world ", 1.0, 3.5),
|
||
|
|
_FakeSegment(" After the silence", 8.5, 11.0), # 5.0s gap
|
||
|
|
])
|
||
|
|
lrc = output.result_to_lrc(result, gap_threshold=1.5)
|
||
|
|
lines = lrc.strip().splitlines()
|
||
|
|
self.assertEqual(
|
||
|
|
lines,
|
||
|
|
[
|
||
|
|
"[00:01.00]Hello world",
|
||
|
|
"[00:03.50]",
|
||
|
|
"[00:08.50]After the silence",
|
||
|
|
"[00:11.00]",
|
||
|
|
],
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_omits_clear_marker_when_gap_is_within_threshold(self):
|
||
|
|
result = _FakeResult([
|
||
|
|
_FakeSegment(" One", 1.0, 3.5),
|
||
|
|
_FakeSegment(" Two", 4.4, 6.0), # 0.9s gap, well under default
|
||
|
|
])
|
||
|
|
lrc = output.result_to_lrc(result)
|
||
|
|
lines = lrc.strip().splitlines()
|
||
|
|
self.assertEqual(
|
||
|
|
lines,
|
||
|
|
[
|
||
|
|
"[00:01.00]One",
|
||
|
|
"[00:04.40]Two",
|
||
|
|
"[00:06.00]",
|
||
|
|
],
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_gap_threshold_argument_controls_marker_emission(self):
|
||
|
|
result = _FakeResult([
|
||
|
|
_FakeSegment(" One", 0.0, 2.0),
|
||
|
|
_FakeSegment(" Two", 3.7, 5.0), # 1.7s gap
|
||
|
|
])
|
||
|
|
lenient = output.result_to_lrc(result, gap_threshold=2.0).strip().splitlines()
|
||
|
|
strict = output.result_to_lrc(result, gap_threshold=1.5).strip().splitlines()
|
||
|
|
|
||
|
|
self.assertNotIn("[00:02.00]", lenient)
|
||
|
|
self.assertIn("[00:02.00]", strict)
|
||
|
|
|
||
|
|
def test_final_clear_marker_always_emitted(self):
|
||
|
|
result = _FakeResult([
|
||
|
|
_FakeSegment(" Only line", 1.0, 4.25),
|
||
|
|
])
|
||
|
|
lrc = output.result_to_lrc(result)
|
||
|
|
lines = lrc.strip().splitlines()
|
||
|
|
self.assertEqual(
|
||
|
|
lines,
|
||
|
|
[
|
||
|
|
"[00:01.00]Only line",
|
||
|
|
"[00:04.25]",
|
||
|
|
],
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_empty_result_returns_empty_string(self):
|
||
|
|
self.assertEqual(output.result_to_lrc(_FakeResult([])), "")
|
||
|
|
|
||
|
|
def test_timestamp_handles_minutes_and_clamps_negative(self):
|
||
|
|
self.assertEqual(output._format_lrc_timestamp(75.5), "01:15.50")
|
||
|
|
self.assertEqual(output._format_lrc_timestamp(-1.0), "00:00.00")
|
||
|
|
|
||
|
|
|
||
|
|
class ResultToLyricsfileTests(unittest.TestCase):
|
||
|
|
METADATA = output.TrackMetadata(
|
||
|
|
title="Test Title",
|
||
|
|
artist="Test Artist",
|
||
|
|
album="Test Album",
|
||
|
|
duration_ms=240000,
|
||
|
|
language="en",
|
||
|
|
)
|
||
|
|
|
||
|
|
def _english_segment(self) -> _FakeSegment:
|
||
|
|
words = [
|
||
|
|
_FakeWord("The", 12.45, 12.90),
|
||
|
|
_FakeWord(" club", 12.90, 13.50),
|
||
|
|
_FakeWord(" isn't", 13.50, 14.20),
|
||
|
|
_FakeWord(" the", 14.20, 14.60),
|
||
|
|
_FakeWord(" lover", 17.10, 18.20),
|
||
|
|
]
|
||
|
|
return _FakeSegment("The club isn't the lover", 12.45, 18.20, words=words)
|
||
|
|
|
||
|
|
def _cjk_segment(self) -> _FakeSegment:
|
||
|
|
words = [
|
||
|
|
_FakeWord("你", 1.0, 1.2),
|
||
|
|
_FakeWord("好", 1.2, 1.4),
|
||
|
|
_FakeWord("世", 1.4, 1.6),
|
||
|
|
_FakeWord("界", 1.6, 1.9),
|
||
|
|
]
|
||
|
|
return _FakeSegment("你好世界", 1.0, 1.9, words=words)
|
||
|
|
|
||
|
|
def test_english_words_use_trailing_spaces_except_last(self):
|
||
|
|
result = _FakeResult([self._english_segment()])
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
)
|
||
|
|
words = doc["lines"][0]["words"]
|
||
|
|
self.assertEqual(
|
||
|
|
[w["text"] for w in words],
|
||
|
|
["The ", "club ", "isn't ", "the ", "lover"],
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_english_words_concatenate_to_line_text(self):
|
||
|
|
result = _FakeResult([self._english_segment()])
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
)
|
||
|
|
line = doc["lines"][0]
|
||
|
|
self.assertEqual("".join(w["text"] for w in line["words"]), line["text"])
|
||
|
|
|
||
|
|
def test_cjk_words_have_no_spurious_spaces(self):
|
||
|
|
result = _FakeResult([self._cjk_segment()])
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
)
|
||
|
|
words = doc["lines"][0]["words"]
|
||
|
|
self.assertEqual([w["text"] for w in words], ["你", "好", "世", "界"])
|
||
|
|
self.assertEqual(
|
||
|
|
"".join(w["text"] for w in words), doc["lines"][0]["text"]
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_word_timestamps_are_integer_milliseconds(self):
|
||
|
|
result = _FakeResult([self._english_segment()])
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
)
|
||
|
|
first_word = doc["lines"][0]["words"][0]
|
||
|
|
self.assertEqual(first_word["start_ms"], 12450)
|
||
|
|
self.assertEqual(first_word["end_ms"], 12900)
|
||
|
|
self.assertIsInstance(first_word["start_ms"], int)
|
||
|
|
self.assertIsInstance(first_word["end_ms"], int)
|
||
|
|
|
||
|
|
def test_segment_without_words_omits_words_key(self):
|
||
|
|
seg = _FakeSegment(" Bare line ", 20.0, 22.5)
|
||
|
|
result = _FakeResult([seg])
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
)
|
||
|
|
line = doc["lines"][0]
|
||
|
|
self.assertNotIn("words", line)
|
||
|
|
self.assertEqual(line["text"], "Bare line")
|
||
|
|
self.assertEqual(line["start_ms"], 20000)
|
||
|
|
self.assertEqual(line["end_ms"], 22500)
|
||
|
|
|
||
|
|
def test_metadata_optional_fields_omitted_when_unset(self):
|
||
|
|
meta = output.TrackMetadata(title="T", artist="A")
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(_FakeResult([]), meta)
|
||
|
|
)
|
||
|
|
self.assertEqual(doc["metadata"]["title"], "T")
|
||
|
|
self.assertEqual(doc["metadata"]["artist"], "A")
|
||
|
|
self.assertEqual(doc["metadata"]["instrumental"], False)
|
||
|
|
for absent in ("album", "duration_ms", "language"):
|
||
|
|
self.assertNotIn(absent, doc["metadata"])
|
||
|
|
|
||
|
|
def test_metadata_optional_fields_included_when_set(self):
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
|
||
|
|
)
|
||
|
|
self.assertEqual(doc["metadata"]["album"], "Test Album")
|
||
|
|
self.assertEqual(doc["metadata"]["duration_ms"], 240000)
|
||
|
|
self.assertEqual(doc["metadata"]["language"], "en")
|
||
|
|
|
||
|
|
def test_plain_block_is_omitted(self):
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(
|
||
|
|
_FakeResult([self._english_segment()]), self.METADATA
|
||
|
|
)
|
||
|
|
)
|
||
|
|
self.assertNotIn("plain", doc)
|
||
|
|
|
||
|
|
def test_version_is_one_dot_zero(self):
|
||
|
|
doc = yaml.safe_load(
|
||
|
|
output.result_to_lyricsfile(_FakeResult([]), self.METADATA)
|
||
|
|
)
|
||
|
|
self.assertEqual(doc["version"], "1.0")
|
||
|
|
|
||
|
|
def test_apostrophe_round_trips_through_yaml(self):
|
||
|
|
result = _FakeResult([self._english_segment()])
|
||
|
|
rendered = output.result_to_lyricsfile(result, self.METADATA)
|
||
|
|
doc = yaml.safe_load(rendered)
|
||
|
|
self.assertEqual(doc["lines"][0]["words"][2]["text"], "isn't ")
|
||
|
|
|
||
|
|
|
||
|
|
class ParseLrcFileTests(unittest.TestCase):
|
||
|
|
def test_basic_timed_lines(self):
|
||
|
|
lrc = (
|
||
|
|
"[00:01.00]Hello world\n"
|
||
|
|
"[00:04.50]Second line\n"
|
||
|
|
"[00:08.00]Third line\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 3)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["start"], 1.0)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["end"], 4.5)
|
||
|
|
self.assertEqual(parsed.segments[0]["text"], "Hello world")
|
||
|
|
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
|
||
|
|
self.assertAlmostEqual(parsed.segments[1]["end"], 8.0)
|
||
|
|
self.assertEqual(parsed.segments[1]["text"], "Second line")
|
||
|
|
|
||
|
|
def test_gap_markers_set_end_on_preceding_segment(self):
|
||
|
|
lrc = (
|
||
|
|
"[00:01.00]First line\n"
|
||
|
|
"[00:03.00]\n"
|
||
|
|
"[00:10.00]After gap\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 2)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["end"], 3.0)
|
||
|
|
self.assertAlmostEqual(parsed.segments[1]["start"], 10.0)
|
||
|
|
|
||
|
|
def test_last_segment_gets_default_end_when_no_trailing_marker(self):
|
||
|
|
lrc = "[00:05.00]Only line\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 1)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["end"], 10.0)
|
||
|
|
|
||
|
|
def test_last_segment_uses_trailing_gap_marker(self):
|
||
|
|
lrc = (
|
||
|
|
"[00:05.00]Last line\n"
|
||
|
|
"[00:09.50]\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 1)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["end"], 9.5)
|
||
|
|
|
||
|
|
def test_metadata_tags_are_extracted(self):
|
||
|
|
lrc = (
|
||
|
|
"[ti:My Song]\n"
|
||
|
|
"[ar:My Artist]\n"
|
||
|
|
"[al:My Album]\n"
|
||
|
|
"[00:01.00]Hello\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(parsed.metadata_tags["title"], "My Song")
|
||
|
|
self.assertEqual(parsed.metadata_tags["artist"], "My Artist")
|
||
|
|
self.assertEqual(parsed.metadata_tags["album"], "My Album")
|
||
|
|
|
||
|
|
def test_language_tag_variants(self):
|
||
|
|
for tag in ("la", "lang", "language"):
|
||
|
|
lrc = f"[{tag}:ja]\n[00:01.00]Hello\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(
|
||
|
|
parsed.metadata_tags.get("language"), "ja",
|
||
|
|
f"tag [{tag}:ja] should map to language=ja",
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_length_tag_parsed_to_duration_ms(self):
|
||
|
|
lrc = "[length:3:40]\n[00:01.00]Hello\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(parsed.metadata_tags["duration_ms"], str(220000))
|
||
|
|
|
||
|
|
def test_length_tag_with_fraction(self):
|
||
|
|
lrc = "[length:1:30.50]\n[00:01.00]Hello\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(parsed.metadata_tags["duration_ms"], str(90500))
|
||
|
|
|
||
|
|
def test_offset_applied_to_timestamps(self):
|
||
|
|
lrc = (
|
||
|
|
"[offset:+500]\n"
|
||
|
|
"[00:01.00]Hello\n"
|
||
|
|
"[00:04.00]World\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["start"], 1.5)
|
||
|
|
self.assertAlmostEqual(parsed.segments[1]["start"], 4.5)
|
||
|
|
|
||
|
|
def test_negative_offset_clamps_to_zero(self):
|
||
|
|
lrc = (
|
||
|
|
"[offset:-2000]\n"
|
||
|
|
"[00:01.00]Hello\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["start"], 0.0)
|
||
|
|
|
||
|
|
def test_unknown_tags_go_to_dropped(self):
|
||
|
|
lrc = (
|
||
|
|
"[re:LRC Editor v3.0]\n"
|
||
|
|
"[by:Some Person]\n"
|
||
|
|
"[ve:1.0]\n"
|
||
|
|
"[00:01.00]Hello\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertIn("re", parsed.dropped_tags)
|
||
|
|
self.assertIn("by", parsed.dropped_tags)
|
||
|
|
self.assertIn("ve", parsed.dropped_tags)
|
||
|
|
self.assertEqual(len(parsed.metadata_tags), 0)
|
||
|
|
|
||
|
|
def test_empty_content_returns_empty(self):
|
||
|
|
parsed = output.parse_lrc_file("")
|
||
|
|
self.assertEqual(len(parsed.segments), 0)
|
||
|
|
self.assertEqual(len(parsed.metadata_tags), 0)
|
||
|
|
|
||
|
|
def test_metadata_only_returns_no_segments(self):
|
||
|
|
lrc = "[ti:Title]\n[ar:Artist]\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 0)
|
||
|
|
self.assertEqual(parsed.metadata_tags["title"], "Title")
|
||
|
|
|
||
|
|
def test_two_digit_fraction(self):
|
||
|
|
lrc = "[01:23.45]Line\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["start"], 83.45)
|
||
|
|
|
||
|
|
def test_three_digit_fraction(self):
|
||
|
|
lrc = "[01:23.456]Line\n"
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["start"], 83.456)
|
||
|
|
|
||
|
|
def test_multiple_gap_markers_only_last_wins(self):
|
||
|
|
lrc = (
|
||
|
|
"[00:01.00]Line\n"
|
||
|
|
"[00:03.00]\n"
|
||
|
|
"[00:05.00]\n"
|
||
|
|
"[00:10.00]Next\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(len(parsed.segments), 2)
|
||
|
|
self.assertAlmostEqual(parsed.segments[0]["end"], 5.0)
|
||
|
|
|
||
|
|
def test_segments_sorted_by_timestamp(self):
|
||
|
|
lrc = (
|
||
|
|
"[00:10.00]Second\n"
|
||
|
|
"[00:01.00]First\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(parsed.segments[0]["text"], "First")
|
||
|
|
self.assertEqual(parsed.segments[1]["text"], "Second")
|
||
|
|
|
||
|
|
|
||
|
|
class MergeMetadataTests(unittest.TestCase):
|
||
|
|
def test_lrc_tags_override_placeholders(self):
|
||
|
|
parsed = output.ParsedLrc(
|
||
|
|
metadata_tags={"title": "LRC Title", "artist": "LRC Artist", "album": "LRC Album"},
|
||
|
|
)
|
||
|
|
# We can't easily call merge_metadata without an audio file, but
|
||
|
|
# we can test the logic by calling extract_metadata with a
|
||
|
|
# nonexistent path (mutagen returns None) and then merging.
|
||
|
|
meta = output.TrackMetadata(title="Unknown", artist="Unknown Artist")
|
||
|
|
# Simulate merge logic directly
|
||
|
|
tags = parsed.metadata_tags
|
||
|
|
meta.title = tags.get("title") or meta.title
|
||
|
|
meta.artist = tags.get("artist") or meta.artist
|
||
|
|
meta.album = tags.get("album") or meta.album
|
||
|
|
self.assertEqual(meta.title, "LRC Title")
|
||
|
|
self.assertEqual(meta.artist, "LRC Artist")
|
||
|
|
self.assertEqual(meta.album, "LRC Album")
|
||
|
|
|
||
|
|
def test_cli_language_overrides_lrc_tag(self):
|
||
|
|
parsed = output.ParsedLrc(
|
||
|
|
metadata_tags={"language": "ja"},
|
||
|
|
)
|
||
|
|
# merge_metadata passes cli_language first
|
||
|
|
cli_language = "en"
|
||
|
|
language = cli_language or parsed.metadata_tags.get("language")
|
||
|
|
self.assertEqual(language, "en")
|
||
|
|
|
||
|
|
def test_lrc_language_used_when_cli_absent(self):
|
||
|
|
parsed = output.ParsedLrc(
|
||
|
|
metadata_tags={"language": "ja"},
|
||
|
|
)
|
||
|
|
cli_language = None
|
||
|
|
language = cli_language or parsed.metadata_tags.get("language")
|
||
|
|
self.assertEqual(language, "ja")
|
||
|
|
|
||
|
|
def test_dropped_tags_populated(self):
|
||
|
|
lrc = (
|
||
|
|
"[re:SomeEditor]\n"
|
||
|
|
"[by:Author]\n"
|
||
|
|
"[ti:Title]\n"
|
||
|
|
"[00:01.00]Hello\n"
|
||
|
|
)
|
||
|
|
parsed = output.parse_lrc_file(lrc)
|
||
|
|
self.assertEqual(parsed.dropped_tags["re"], "SomeEditor")
|
||
|
|
self.assertEqual(parsed.dropped_tags["by"], "Author")
|
||
|
|
self.assertNotIn("ti", parsed.dropped_tags)
|
||
|
|
|
||
|
|
|
||
|
|
class FileIsAudioTests(unittest.TestCase):
|
||
|
|
def test_recognises_known_extensions(self):
|
||
|
|
for name in ("song.mp3", "track.flac", "tune.OPUS"):
|
||
|
|
self.assertTrue(
|
||
|
|
output.file_is_audio(name.lower()),
|
||
|
|
f"expected {name!r} to be recognised as audio",
|
||
|
|
)
|
||
|
|
|
||
|
|
def test_rejects_unknown_extensions(self):
|
||
|
|
for name in ("notes.txt", "cover.jpg", "lyrics.lrc", "lyrics.yaml"):
|
||
|
|
self.assertFalse(output.file_is_audio(name))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
unittest.main()
|