initial commit
This commit is contained in:
Generated
+1158
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,19 @@
|
|||||||
|
[project]
|
||||||
|
name = "txtlyric-to-lrc"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = ""
|
||||||
|
authors = [
|
||||||
|
{name = "Your Name",email = "you@example.com"}
|
||||||
|
]
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"stable-ts (>=2.19.0,<3.0.0)",
|
||||||
|
"click (>=8.2.1,<9.0.0)",
|
||||||
|
"srt (>=3.5.3,<4.0.0)"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import click
|
||||||
|
import stable_whisper
|
||||||
|
import srt
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
song_file_extensions = set([
|
||||||
|
"mp3", # explicitly supported by whisper
|
||||||
|
"wav",
|
||||||
|
"mp4",
|
||||||
|
"mpeg",
|
||||||
|
"mpga",
|
||||||
|
"m4a",
|
||||||
|
"webm",
|
||||||
|
"flac", # stable-ts likely uses ffmpeg to convert this for whisper
|
||||||
|
])
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option('--model', '-m', default='small', help='Which whisper model to use (choices are those of whisper.available_models)')
|
||||||
|
@click.option('--language', '-l', required=True)
|
||||||
|
@click.argument('directory')
|
||||||
|
def main(model, language, directory):
|
||||||
|
print(f"Loading {model} model for use with language {language}...")
|
||||||
|
model = stable_whisper.load_model(model)
|
||||||
|
|
||||||
|
for (dirpath, _, filenames) in os.walk(directory):
|
||||||
|
if any([ file_is_audio(f) for f in filenames ]):
|
||||||
|
# audio files exist here, so let's process this folder
|
||||||
|
for f in filenames:
|
||||||
|
if get_file_extension(f) in song_file_extensions:
|
||||||
|
filename = os.path.join(dirpath, f)
|
||||||
|
# get corresponding txt file name
|
||||||
|
# (we expect unaligned files to have the same filename as the song, except for the extension)
|
||||||
|
unaligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".txt")
|
||||||
|
# (we also expect existing .lrc files to contain aligned lyrics)
|
||||||
|
aligned_lyrics_filename = os.path.join(dirpath, os.path.splitext(f)[0] + ".lrc")
|
||||||
|
|
||||||
|
# run model to get aligned srt
|
||||||
|
if not os.path.exists(unaligned_lyrics_filename):
|
||||||
|
print(f"No corresponding unaligned lyric txt exists for {filename}")
|
||||||
|
continue
|
||||||
|
if os.path.exists(aligned_lyrics_filename):
|
||||||
|
print(f"Corresponding aligned lyric txt already exists for {filename}")
|
||||||
|
continue
|
||||||
|
print(f"Aligning lyrics for {filename}")
|
||||||
|
|
||||||
|
result: stable_whisper.WhisperResult = model.align(filename, open(unaligned_lyrics_filename).read(), language=language, original_split=True, regroup=False)
|
||||||
|
|
||||||
|
# turn srt to lrc
|
||||||
|
srt = result.to_srt_vtt(filepath=None, word_level=False)
|
||||||
|
lrc = srt_to_lrc(srt)
|
||||||
|
print(f"Writing aligned lyrics at: {aligned_lyrics_filename}")
|
||||||
|
open(aligned_lyrics_filename, "x").write(lrc)
|
||||||
|
|
||||||
|
def get_file_extension(filename: str) -> str:
|
||||||
|
# "asdf.omg.lol" -> [asdf.omg, .lol] -> lol
|
||||||
|
return os.path.splitext(filename)[1][1:]
|
||||||
|
|
||||||
|
def file_is_audio(filename: str) -> bool:
|
||||||
|
return get_file_extension(filename) in song_file_extensions
|
||||||
|
|
||||||
|
def timedelta_to_hhmmssss(td: datetime.timedelta) -> str:
|
||||||
|
dt = datetime.datetime(1969, 1, 1) + td
|
||||||
|
return dt.strftime('%M:%S.%f')[:-4]
|
||||||
|
|
||||||
|
def srt_to_lrc(srt_text: str) -> str:
|
||||||
|
subs = list(srt.parse(srt_text))
|
||||||
|
lines = [f"[{timedelta_to_hhmmssss(s.start)}]{s.content}" for s in subs]
|
||||||
|
|
||||||
|
# add the end of lyrics marker
|
||||||
|
end_time = subs[-1].end
|
||||||
|
lines.append(f"[{timedelta_to_hhmmssss(end_time)}]")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
Reference in New Issue
Block a user