Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor lyrics code #219

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 138 additions & 130 deletions app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ object LrcUtils {
private const val TAG = "LrcUtils"

@OptIn(UnstableApi::class)
fun extractAndParseLyrics(metadata: Metadata, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun extractAndParseLyrics(
metadata: Metadata,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
for (i in 0..<metadata.length()) {
val meta = metadata.get(i)
val data =
Expand All @@ -43,15 +47,20 @@ object LrcUtils {
}

@OptIn(UnstableApi::class)
fun loadAndParseLyricsFile(musicFile: File?, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun loadAndParseLyricsFile(
musicFile: File?,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
val lrcFile = musicFile?.let { File(it.parentFile, it.nameWithoutExtension + ".lrc") }
return loadLrcFile(lrcFile)?.let {
try {
parseLrcString(it, trim, multilineEnable)
} catch (e: Exception) {
Log.e(TAG, Log.getStackTraceString(e))
null
} }
}
}
}

private fun loadLrcFile(lrcFile: File?): String? {
Expand Down Expand Up @@ -88,75 +97,77 @@ object LrcUtils {
* We completely ignore all ID3 tags from the header as MediaStore is our source of truth.
*/
@VisibleForTesting
fun parseLrcString(lrcContent: String, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun parseLrcString(
lrcContent: String,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
if (lrcContent.isBlank()) return null

// Regex to match time tags in various formats
val timeMarksRegex = "\\[(\\d{2}:\\d{2})([.:]\\d+)?]".toRegex()
val list = mutableListOf<MediaStoreUtils.Lyric>()
val lyricsList = mutableListOf<MediaStoreUtils.Lyric>()
var foundNonNull = false
var lyricsText: StringBuilder? = StringBuilder()
//val measureTime = measureTimeMillis {
// Add all lines found on LRC (probably will be unordered because of "compression" or translation type)

lrcContent.lines().forEach { line ->
timeMarksRegex.findAll(line).let { sequence ->
if (sequence.count() == 0) {
return@let
}
var lyricLine : String
sequence.forEach { match ->
val firstSync = match.groupValues.subList(1, match.groupValues.size)
.joinToString("")

val ts = parseTime(firstSync)
if (!foundNonNull && ts > 0) {
foundNonNull = true
lyricsText = null
}
val matches = timeMarksRegex.findAll(line).toList()
if (matches.isEmpty()) return@forEach

if (multilineEnable) {
val startIndex = lrcContent.indexOf(line) + firstSync.length+1
var endIndex = lrcContent.length // default to end
var nextSync = ""

// track next sync point if found
if (timeMarksRegex.find(lrcContent, startIndex)?.value != null) {
nextSync = timeMarksRegex.find(lrcContent, startIndex)?.value!!
endIndex = lrcContent.indexOf(nextSync) - 1 // delete \n at end
}

// read as single line *IF* this is a single line lyric
lyricLine = if (nextSync == "[$firstSync]") {
line.substring(sequence.last().range.last + 1)
.let { if (trim) it.trim() else it }
} else {
lrcContent.substring(startIndex + 1, endIndex)
.let { if (trim) it.trim() else it }
}
}
else {
lyricLine = line.substring(sequence.last().range.last + 1)
.let { if (trim) it.trim() else it }
}
matches.forEach { match ->
val timeString = match.groupValues[1] + (match.groupValues[2] ?: "")
val timestamp = parseTime(timeString)

lyricsText?.append(lyricLine + "\n")
list.add(MediaStoreUtils.Lyric(ts, lyricLine))
if (!foundNonNull && timestamp > 0) {
foundNonNull = true
lyricsText = null
}

// Extract the lyric line based on whether multiline is enabled
val lyricLine = if (multilineEnable) {
val startIndex = lrcContent.indexOf(line) + match.value.length
val endIndex = findEndIndex(lrcContent, startIndex, timeMarksRegex)
lrcContent.substring(startIndex, endIndex).let { if (trim) it.trim() else it }
} else {
line.substring(match.range.last + 1).let { if (trim) it.trim() else it }
}

// Append the lyric line to lyricsText if necessary and add to list
lyricsText?.append("$lyricLine\n")
lyricsList.add(MediaStoreUtils.Lyric(timestamp, lyricLine))
}
}
// Sort and mark as translations all found duplicated timestamps (usually one)
list.sortBy { it.timeStamp }
var previousTs = -1L
list.forEach {
it.isTranslation = (it.timeStamp!! == previousTs)
previousTs = it.timeStamp
}
//}
if (list.isEmpty() && lrcContent.isNotEmpty()) {
list.add(MediaStoreUtils.Lyric(null, lrcContent, false))

// Mark translations and sort by timestamp
markTranslations(lyricsList)

// Handle cases where no valid timestamps were found
if (lyricsList.isEmpty() && lrcContent.isNotEmpty()) {
lyricsList.add(MediaStoreUtils.Lyric(null, lrcContent, false))
} else if (!foundNonNull) {
list.clear()
list.add(MediaStoreUtils.Lyric(null, lyricsText!!.toString(), false))
lyricsList.clear()
lyricsList.add(MediaStoreUtils.Lyric(null, lyricsText.toString(), false))
}

return lyricsList
}

private fun findEndIndex(
lrcContent: String,
startIndex: Int,
timeMarksRegex: Regex
): Int {
val nextSyncMatch = timeMarksRegex.find(lrcContent, startIndex)
return nextSyncMatch?.range?.first?.minus(1) ?: lrcContent.length
}

private fun markTranslations(lyricsList: MutableList<MediaStoreUtils.Lyric>) {
lyricsList.sortBy { it.timeStamp }
var previousTimestamp: Long? = null
lyricsList.forEach {
it.isTranslation = (it.timeStamp == previousTimestamp)
previousTimestamp = it.timeStamp
}
return list
}

private fun parseTime(timeString: String): Long {
Expand All @@ -166,96 +177,93 @@ object LrcUtils {
val minutes = matchResult?.groupValues?.get(1)?.toLongOrNull() ?: 0
val seconds = matchResult?.groupValues?.get(2)?.toLongOrNull() ?: 0
val millisecondsString = matchResult?.groupValues?.get(3)
// if one specifies micro/pico/nano/whatever seconds for some insane reason,
// scrap the extra information
val milliseconds = (millisecondsString?.substring(0,
millisecondsString.length.coerceAtMost(3)) ?.toLongOrNull() ?: 0) *
10f.pow(3 - (millisecondsString?.length ?: 0)).toLong()
val milliseconds = millisecondsString?.padEnd(3, '0')?.take(3)?.toLongOrNull() ?: 0

return minutes * 60000 + seconds * 1000 + milliseconds
}
}

// Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt

// Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt
// See http://id3.org/id3v2.4.0-frames
@OptIn(UnstableApi::class)
private class UsltFrameDecoder {
companion object {
private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0
private const val ID3_TEXT_ENCODING_UTF_16 = 1
private const val ID3_TEXT_ENCODING_UTF_16BE = 2
private const val ID3_TEXT_ENCODING_UTF_8 = 3

fun decode(id3Data: ParsableByteArray): String? {
if (id3Data.limit() < 4) {
// Frame is malformed.
return null
}
@OptIn(UnstableApi::class)
private class UsltFrameDecoder {
companion object {
private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0
private const val ID3_TEXT_ENCODING_UTF_16 = 1
private const val ID3_TEXT_ENCODING_UTF_16BE = 2
private const val ID3_TEXT_ENCODING_UTF_8 = 3

val encoding = id3Data.readUnsignedByte()
val charset = getCharsetName(encoding)
fun decode(id3Data: ParsableByteArray): String? {
if (id3Data.limit() < 4) {
// Frame is malformed.
return null
}

val lang = ByteArray(3)
id3Data.readBytes(lang, 0, 3) // language
val rest = ByteArray(id3Data.limit() - 4)
id3Data.readBytes(rest, 0, id3Data.limit() - 4)
val encoding = id3Data.readUnsignedByte()
val charset = getCharsetName(encoding)

val descriptionEndIndex = indexOfEos(rest, 0, encoding)
val textStartIndex = descriptionEndIndex + delimiterLength(encoding)
val textEndIndex = indexOfEos(rest, textStartIndex, encoding)
return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset)
}
val lang = ByteArray(3)
id3Data.readBytes(lang, 0, 3) // language
val rest = ByteArray(id3Data.limit() - 4)
id3Data.readBytes(rest, 0, id3Data.limit() - 4)

private fun getCharsetName(encodingByte: Int): Charset {
val name = when (encodingByte) {
ID3_TEXT_ENCODING_UTF_16 -> "UTF-16"
ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE"
ID3_TEXT_ENCODING_UTF_8 -> "UTF-8"
ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1"
else -> "ISO-8859-1"
val descriptionEndIndex = indexOfEos(rest, 0, encoding)
val textStartIndex = descriptionEndIndex + delimiterLength(encoding)
val textEndIndex = indexOfEos(rest, textStartIndex, encoding)
return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset)
}
return Charset.forName(name)
}

private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int {
var terminationPos = indexOfZeroByte(data, fromIndex)

// For single byte encoding charsets, we're done.
if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) {
return terminationPos
private fun getCharsetName(encodingByte: Int): Charset {
val name = when (encodingByte) {
ID3_TEXT_ENCODING_UTF_16 -> "UTF-16"
ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE"
ID3_TEXT_ENCODING_UTF_8 -> "UTF-8"
ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1"
else -> "ISO-8859-1"
}
return Charset.forName(name)
}

// Otherwise ensure an even index and look for a second zero byte.
while (terminationPos < data.size - 1) {
if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) {
private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int {
var terminationPos = indexOfZeroByte(data, fromIndex)

// For single byte encoding charsets, we're done.
if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) {
return terminationPos
}
terminationPos = indexOfZeroByte(data, terminationPos + 1)
}

return data.size
}
// Otherwise ensure an even index and look for a second zero byte.
while (terminationPos < data.size - 1) {
if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) {
return terminationPos
}
terminationPos = indexOfZeroByte(data, terminationPos + 1)
}

private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int {
for (i in fromIndex until data.size) {
if (data[i] == 0.toByte()) {
return i
return data.size
}

private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int {
for (i in fromIndex until data.size) {
if (data[i] == 0.toByte()) {
return i
}
}
return data.size
}
return data.size
}

private fun delimiterLength(encodingByte: Int): Int {
return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8)
1
else
2
}
private fun delimiterLength(encodingByte: Int): Int {
return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8)
1
else
2
}

private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String {
return if (to <= from || to > data.size) {
""
} else String(data, from, to - from, charset)
private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String {
return if (to <= from || to > data.size) {
""
} else String(data, from, to - from, charset)
}
}
}
}
}
Loading