From f2d0b77b6f859a0abf3b3264fd3b026960dc3b71 Mon Sep 17 00:00:00 2001 From: "Nicholas (MD3 )" Date: Thu, 11 Jul 2024 19:20:41 +0400 Subject: [PATCH] Refactor lyrics code --- .../gramophone/logic/utils/LrcUtils.kt | 268 +++++++++--------- 1 file changed, 138 insertions(+), 130 deletions(-) diff --git a/app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt b/app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt index 1078a7779..272e7f85f 100644 --- a/app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt +++ b/app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt @@ -18,7 +18,11 @@ object LrcUtils { private const val TAG = "LrcUtils" @OptIn(UnstableApi::class) - fun extractAndParseLyrics(metadata: Metadata, trim: Boolean, multilineEnable: Boolean): MutableList? { + fun extractAndParseLyrics( + metadata: Metadata, + trim: Boolean, + multilineEnable: Boolean + ): MutableList? { for (i in 0..? { + fun loadAndParseLyricsFile( + musicFile: File?, + trim: Boolean, + multilineEnable: Boolean + ): MutableList? { val lrcFile = musicFile?.let { File(it.parentFile, it.nameWithoutExtension + ".lrc") } return loadLrcFile(lrcFile)?.let { try { @@ -51,7 +59,8 @@ object LrcUtils { } catch (e: Exception) { Log.e(TAG, Log.getStackTraceString(e)) null - } } + } + } } private fun loadLrcFile(lrcFile: File?): String? { @@ -88,75 +97,77 @@ object LrcUtils { * We completely ignore all ID3 tags from the header as MediaStore is our source of truth. */ @VisibleForTesting - fun parseLrcString(lrcContent: String, trim: Boolean, multilineEnable: Boolean): MutableList? { + fun parseLrcString( + lrcContent: String, + trim: Boolean, + multilineEnable: Boolean + ): MutableList? { if (lrcContent.isBlank()) return null + + // Regex to match time tags in various formats val timeMarksRegex = "\\[(\\d{2}:\\d{2})([.:]\\d+)?]".toRegex() - val list = mutableListOf() + val lyricsList = mutableListOf() var foundNonNull = false var lyricsText: StringBuilder? = StringBuilder() - //val measureTime = measureTimeMillis { - // Add all lines found on LRC (probably will be unordered because of "compression" or translation type) + lrcContent.lines().forEach { line -> - timeMarksRegex.findAll(line).let { sequence -> - if (sequence.count() == 0) { - return@let - } - var lyricLine : String - sequence.forEach { match -> - val firstSync = match.groupValues.subList(1, match.groupValues.size) - .joinToString("") - - val ts = parseTime(firstSync) - if (!foundNonNull && ts > 0) { - foundNonNull = true - lyricsText = null - } + val matches = timeMarksRegex.findAll(line).toList() + if (matches.isEmpty()) return@forEach - if (multilineEnable) { - val startIndex = lrcContent.indexOf(line) + firstSync.length+1 - var endIndex = lrcContent.length // default to end - var nextSync = "" - - // track next sync point if found - if (timeMarksRegex.find(lrcContent, startIndex)?.value != null) { - nextSync = timeMarksRegex.find(lrcContent, startIndex)?.value!! - endIndex = lrcContent.indexOf(nextSync) - 1 // delete \n at end - } - - // read as single line *IF* this is a single line lyric - lyricLine = if (nextSync == "[$firstSync]") { - line.substring(sequence.last().range.last + 1) - .let { if (trim) it.trim() else it } - } else { - lrcContent.substring(startIndex + 1, endIndex) - .let { if (trim) it.trim() else it } - } - } - else { - lyricLine = line.substring(sequence.last().range.last + 1) - .let { if (trim) it.trim() else it } - } + matches.forEach { match -> + val timeString = match.groupValues[1] + (match.groupValues[2] ?: "") + val timestamp = parseTime(timeString) - lyricsText?.append(lyricLine + "\n") - list.add(MediaStoreUtils.Lyric(ts, lyricLine)) + if (!foundNonNull && timestamp > 0) { + foundNonNull = true + lyricsText = null } + + // Extract the lyric line based on whether multiline is enabled + val lyricLine = if (multilineEnable) { + val startIndex = lrcContent.indexOf(line) + match.value.length + val endIndex = findEndIndex(lrcContent, startIndex, timeMarksRegex) + lrcContent.substring(startIndex, endIndex).let { if (trim) it.trim() else it } + } else { + line.substring(match.range.last + 1).let { if (trim) it.trim() else it } + } + + // Append the lyric line to lyricsText if necessary and add to list + lyricsText?.append("$lyricLine\n") + lyricsList.add(MediaStoreUtils.Lyric(timestamp, lyricLine)) } } - // Sort and mark as translations all found duplicated timestamps (usually one) - list.sortBy { it.timeStamp } - var previousTs = -1L - list.forEach { - it.isTranslation = (it.timeStamp!! == previousTs) - previousTs = it.timeStamp - } - //} - if (list.isEmpty() && lrcContent.isNotEmpty()) { - list.add(MediaStoreUtils.Lyric(null, lrcContent, false)) + + // Mark translations and sort by timestamp + markTranslations(lyricsList) + + // Handle cases where no valid timestamps were found + if (lyricsList.isEmpty() && lrcContent.isNotEmpty()) { + lyricsList.add(MediaStoreUtils.Lyric(null, lrcContent, false)) } else if (!foundNonNull) { - list.clear() - list.add(MediaStoreUtils.Lyric(null, lyricsText!!.toString(), false)) + lyricsList.clear() + lyricsList.add(MediaStoreUtils.Lyric(null, lyricsText.toString(), false)) + } + + return lyricsList + } + + private fun findEndIndex( + lrcContent: String, + startIndex: Int, + timeMarksRegex: Regex + ): Int { + val nextSyncMatch = timeMarksRegex.find(lrcContent, startIndex) + return nextSyncMatch?.range?.first?.minus(1) ?: lrcContent.length + } + + private fun markTranslations(lyricsList: MutableList) { + lyricsList.sortBy { it.timeStamp } + var previousTimestamp: Long? = null + lyricsList.forEach { + it.isTranslation = (it.timeStamp == previousTimestamp) + previousTimestamp = it.timeStamp } - return list } private fun parseTime(timeString: String): Long { @@ -166,96 +177,93 @@ object LrcUtils { val minutes = matchResult?.groupValues?.get(1)?.toLongOrNull() ?: 0 val seconds = matchResult?.groupValues?.get(2)?.toLongOrNull() ?: 0 val millisecondsString = matchResult?.groupValues?.get(3) - // if one specifies micro/pico/nano/whatever seconds for some insane reason, - // scrap the extra information - val milliseconds = (millisecondsString?.substring(0, - millisecondsString.length.coerceAtMost(3)) ?.toLongOrNull() ?: 0) * - 10f.pow(3 - (millisecondsString?.length ?: 0)).toLong() + val milliseconds = millisecondsString?.padEnd(3, '0')?.take(3)?.toLongOrNull() ?: 0 return minutes * 60000 + seconds * 1000 + milliseconds } } -// Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt + + // Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt // See http://id3.org/id3v2.4.0-frames -@OptIn(UnstableApi::class) -private class UsltFrameDecoder { - companion object { - private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0 - private const val ID3_TEXT_ENCODING_UTF_16 = 1 - private const val ID3_TEXT_ENCODING_UTF_16BE = 2 - private const val ID3_TEXT_ENCODING_UTF_8 = 3 - - fun decode(id3Data: ParsableByteArray): String? { - if (id3Data.limit() < 4) { - // Frame is malformed. - return null - } + @OptIn(UnstableApi::class) + private class UsltFrameDecoder { + companion object { + private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0 + private const val ID3_TEXT_ENCODING_UTF_16 = 1 + private const val ID3_TEXT_ENCODING_UTF_16BE = 2 + private const val ID3_TEXT_ENCODING_UTF_8 = 3 - val encoding = id3Data.readUnsignedByte() - val charset = getCharsetName(encoding) + fun decode(id3Data: ParsableByteArray): String? { + if (id3Data.limit() < 4) { + // Frame is malformed. + return null + } - val lang = ByteArray(3) - id3Data.readBytes(lang, 0, 3) // language - val rest = ByteArray(id3Data.limit() - 4) - id3Data.readBytes(rest, 0, id3Data.limit() - 4) + val encoding = id3Data.readUnsignedByte() + val charset = getCharsetName(encoding) - val descriptionEndIndex = indexOfEos(rest, 0, encoding) - val textStartIndex = descriptionEndIndex + delimiterLength(encoding) - val textEndIndex = indexOfEos(rest, textStartIndex, encoding) - return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset) - } + val lang = ByteArray(3) + id3Data.readBytes(lang, 0, 3) // language + val rest = ByteArray(id3Data.limit() - 4) + id3Data.readBytes(rest, 0, id3Data.limit() - 4) - private fun getCharsetName(encodingByte: Int): Charset { - val name = when (encodingByte) { - ID3_TEXT_ENCODING_UTF_16 -> "UTF-16" - ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE" - ID3_TEXT_ENCODING_UTF_8 -> "UTF-8" - ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1" - else -> "ISO-8859-1" + val descriptionEndIndex = indexOfEos(rest, 0, encoding) + val textStartIndex = descriptionEndIndex + delimiterLength(encoding) + val textEndIndex = indexOfEos(rest, textStartIndex, encoding) + return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset) } - return Charset.forName(name) - } - private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int { - var terminationPos = indexOfZeroByte(data, fromIndex) - - // For single byte encoding charsets, we're done. - if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) { - return terminationPos + private fun getCharsetName(encodingByte: Int): Charset { + val name = when (encodingByte) { + ID3_TEXT_ENCODING_UTF_16 -> "UTF-16" + ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE" + ID3_TEXT_ENCODING_UTF_8 -> "UTF-8" + ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1" + else -> "ISO-8859-1" + } + return Charset.forName(name) } - // Otherwise ensure an even index and look for a second zero byte. - while (terminationPos < data.size - 1) { - if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) { + private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int { + var terminationPos = indexOfZeroByte(data, fromIndex) + + // For single byte encoding charsets, we're done. + if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) { return terminationPos } - terminationPos = indexOfZeroByte(data, terminationPos + 1) - } - return data.size - } + // Otherwise ensure an even index and look for a second zero byte. + while (terminationPos < data.size - 1) { + if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) { + return terminationPos + } + terminationPos = indexOfZeroByte(data, terminationPos + 1) + } - private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int { - for (i in fromIndex until data.size) { - if (data[i] == 0.toByte()) { - return i + return data.size + } + + private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int { + for (i in fromIndex until data.size) { + if (data[i] == 0.toByte()) { + return i + } } + return data.size } - return data.size - } - private fun delimiterLength(encodingByte: Int): Int { - return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8) - 1 - else - 2 - } + private fun delimiterLength(encodingByte: Int): Int { + return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8) + 1 + else + 2 + } - private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String { - return if (to <= from || to > data.size) { - "" - } else String(data, from, to - from, charset) + private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String { + return if (to <= from || to > data.size) { + "" + } else String(data, from, to - from, charset) + } } - } -} \ No newline at end of file + } \ No newline at end of file