Skip to content

Commit

Permalink
Merge pull request #219 from nicholasmd3/beta
Browse files Browse the repository at this point in the history
Refactor lyrics code
  • Loading branch information
nift4 authored Jul 12, 2024
2 parents f49d56a + f2d0b77 commit 2e9cea2
Showing 1 changed file with 138 additions and 130 deletions.
268 changes: 138 additions & 130 deletions app/src/main/kotlin/org/akanework/gramophone/logic/utils/LrcUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ object LrcUtils {
private const val TAG = "LrcUtils"

@OptIn(UnstableApi::class)
fun extractAndParseLyrics(metadata: Metadata, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun extractAndParseLyrics(
metadata: Metadata,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
for (i in 0..<metadata.length()) {
val meta = metadata.get(i)
val data =
Expand All @@ -43,15 +47,20 @@ object LrcUtils {
}

@OptIn(UnstableApi::class)
fun loadAndParseLyricsFile(musicFile: File?, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun loadAndParseLyricsFile(
musicFile: File?,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
val lrcFile = musicFile?.let { File(it.parentFile, it.nameWithoutExtension + ".lrc") }
return loadLrcFile(lrcFile)?.let {
try {
parseLrcString(it, trim, multilineEnable)
} catch (e: Exception) {
Log.e(TAG, Log.getStackTraceString(e))
null
} }
}
}
}

private fun loadLrcFile(lrcFile: File?): String? {
Expand Down Expand Up @@ -88,75 +97,77 @@ object LrcUtils {
* We completely ignore all ID3 tags from the header as MediaStore is our source of truth.
*/
@VisibleForTesting
fun parseLrcString(lrcContent: String, trim: Boolean, multilineEnable: Boolean): MutableList<MediaStoreUtils.Lyric>? {
fun parseLrcString(
lrcContent: String,
trim: Boolean,
multilineEnable: Boolean
): MutableList<MediaStoreUtils.Lyric>? {
if (lrcContent.isBlank()) return null

// Regex to match time tags in various formats
val timeMarksRegex = "\\[(\\d{2}:\\d{2})([.:]\\d+)?]".toRegex()
val list = mutableListOf<MediaStoreUtils.Lyric>()
val lyricsList = mutableListOf<MediaStoreUtils.Lyric>()
var foundNonNull = false
var lyricsText: StringBuilder? = StringBuilder()
//val measureTime = measureTimeMillis {
// Add all lines found on LRC (probably will be unordered because of "compression" or translation type)

lrcContent.lines().forEach { line ->
timeMarksRegex.findAll(line).let { sequence ->
if (sequence.count() == 0) {
return@let
}
var lyricLine : String
sequence.forEach { match ->
val firstSync = match.groupValues.subList(1, match.groupValues.size)
.joinToString("")

val ts = parseTime(firstSync)
if (!foundNonNull && ts > 0) {
foundNonNull = true
lyricsText = null
}
val matches = timeMarksRegex.findAll(line).toList()
if (matches.isEmpty()) return@forEach

if (multilineEnable) {
val startIndex = lrcContent.indexOf(line) + firstSync.length+1
var endIndex = lrcContent.length // default to end
var nextSync = ""

// track next sync point if found
if (timeMarksRegex.find(lrcContent, startIndex)?.value != null) {
nextSync = timeMarksRegex.find(lrcContent, startIndex)?.value!!
endIndex = lrcContent.indexOf(nextSync) - 1 // delete \n at end
}

// read as single line *IF* this is a single line lyric
lyricLine = if (nextSync == "[$firstSync]") {
line.substring(sequence.last().range.last + 1)
.let { if (trim) it.trim() else it }
} else {
lrcContent.substring(startIndex + 1, endIndex)
.let { if (trim) it.trim() else it }
}
}
else {
lyricLine = line.substring(sequence.last().range.last + 1)
.let { if (trim) it.trim() else it }
}
matches.forEach { match ->
val timeString = match.groupValues[1] + (match.groupValues[2] ?: "")
val timestamp = parseTime(timeString)

lyricsText?.append(lyricLine + "\n")
list.add(MediaStoreUtils.Lyric(ts, lyricLine))
if (!foundNonNull && timestamp > 0) {
foundNonNull = true
lyricsText = null
}

// Extract the lyric line based on whether multiline is enabled
val lyricLine = if (multilineEnable) {
val startIndex = lrcContent.indexOf(line) + match.value.length
val endIndex = findEndIndex(lrcContent, startIndex, timeMarksRegex)
lrcContent.substring(startIndex, endIndex).let { if (trim) it.trim() else it }
} else {
line.substring(match.range.last + 1).let { if (trim) it.trim() else it }
}

// Append the lyric line to lyricsText if necessary and add to list
lyricsText?.append("$lyricLine\n")
lyricsList.add(MediaStoreUtils.Lyric(timestamp, lyricLine))
}
}
// Sort and mark as translations all found duplicated timestamps (usually one)
list.sortBy { it.timeStamp }
var previousTs = -1L
list.forEach {
it.isTranslation = (it.timeStamp!! == previousTs)
previousTs = it.timeStamp
}
//}
if (list.isEmpty() && lrcContent.isNotEmpty()) {
list.add(MediaStoreUtils.Lyric(null, lrcContent, false))

// Mark translations and sort by timestamp
markTranslations(lyricsList)

// Handle cases where no valid timestamps were found
if (lyricsList.isEmpty() && lrcContent.isNotEmpty()) {
lyricsList.add(MediaStoreUtils.Lyric(null, lrcContent, false))
} else if (!foundNonNull) {
list.clear()
list.add(MediaStoreUtils.Lyric(null, lyricsText!!.toString(), false))
lyricsList.clear()
lyricsList.add(MediaStoreUtils.Lyric(null, lyricsText.toString(), false))
}

return lyricsList
}

private fun findEndIndex(
lrcContent: String,
startIndex: Int,
timeMarksRegex: Regex
): Int {
val nextSyncMatch = timeMarksRegex.find(lrcContent, startIndex)
return nextSyncMatch?.range?.first?.minus(1) ?: lrcContent.length
}

private fun markTranslations(lyricsList: MutableList<MediaStoreUtils.Lyric>) {
lyricsList.sortBy { it.timeStamp }
var previousTimestamp: Long? = null
lyricsList.forEach {
it.isTranslation = (it.timeStamp == previousTimestamp)
previousTimestamp = it.timeStamp
}
return list
}

private fun parseTime(timeString: String): Long {
Expand All @@ -166,96 +177,93 @@ object LrcUtils {
val minutes = matchResult?.groupValues?.get(1)?.toLongOrNull() ?: 0
val seconds = matchResult?.groupValues?.get(2)?.toLongOrNull() ?: 0
val millisecondsString = matchResult?.groupValues?.get(3)
// if one specifies micro/pico/nano/whatever seconds for some insane reason,
// scrap the extra information
val milliseconds = (millisecondsString?.substring(0,
millisecondsString.length.coerceAtMost(3)) ?.toLongOrNull() ?: 0) *
10f.pow(3 - (millisecondsString?.length ?: 0)).toLong()
val milliseconds = millisecondsString?.padEnd(3, '0')?.take(3)?.toLongOrNull() ?: 0

return minutes * 60000 + seconds * 1000 + milliseconds
}
}

// Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt

// Class heavily based on MIT-licensed https://github.com/yoheimuta/ExoPlayerMusic/blob/77cfb989b59f6906b1170c9b2d565f9b8447db41/app/src/main/java/com/github/yoheimuta/amplayer/playback/UsltFrameDecoder.kt
// See http://id3.org/id3v2.4.0-frames
@OptIn(UnstableApi::class)
private class UsltFrameDecoder {
companion object {
private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0
private const val ID3_TEXT_ENCODING_UTF_16 = 1
private const val ID3_TEXT_ENCODING_UTF_16BE = 2
private const val ID3_TEXT_ENCODING_UTF_8 = 3

fun decode(id3Data: ParsableByteArray): String? {
if (id3Data.limit() < 4) {
// Frame is malformed.
return null
}
@OptIn(UnstableApi::class)
private class UsltFrameDecoder {
companion object {
private const val ID3_TEXT_ENCODING_ISO_8859_1 = 0
private const val ID3_TEXT_ENCODING_UTF_16 = 1
private const val ID3_TEXT_ENCODING_UTF_16BE = 2
private const val ID3_TEXT_ENCODING_UTF_8 = 3

val encoding = id3Data.readUnsignedByte()
val charset = getCharsetName(encoding)
fun decode(id3Data: ParsableByteArray): String? {
if (id3Data.limit() < 4) {
// Frame is malformed.
return null
}

val lang = ByteArray(3)
id3Data.readBytes(lang, 0, 3) // language
val rest = ByteArray(id3Data.limit() - 4)
id3Data.readBytes(rest, 0, id3Data.limit() - 4)
val encoding = id3Data.readUnsignedByte()
val charset = getCharsetName(encoding)

val descriptionEndIndex = indexOfEos(rest, 0, encoding)
val textStartIndex = descriptionEndIndex + delimiterLength(encoding)
val textEndIndex = indexOfEos(rest, textStartIndex, encoding)
return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset)
}
val lang = ByteArray(3)
id3Data.readBytes(lang, 0, 3) // language
val rest = ByteArray(id3Data.limit() - 4)
id3Data.readBytes(rest, 0, id3Data.limit() - 4)

private fun getCharsetName(encodingByte: Int): Charset {
val name = when (encodingByte) {
ID3_TEXT_ENCODING_UTF_16 -> "UTF-16"
ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE"
ID3_TEXT_ENCODING_UTF_8 -> "UTF-8"
ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1"
else -> "ISO-8859-1"
val descriptionEndIndex = indexOfEos(rest, 0, encoding)
val textStartIndex = descriptionEndIndex + delimiterLength(encoding)
val textEndIndex = indexOfEos(rest, textStartIndex, encoding)
return decodeStringIfValid(rest, textStartIndex, textEndIndex, charset)
}
return Charset.forName(name)
}

private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int {
var terminationPos = indexOfZeroByte(data, fromIndex)

// For single byte encoding charsets, we're done.
if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) {
return terminationPos
private fun getCharsetName(encodingByte: Int): Charset {
val name = when (encodingByte) {
ID3_TEXT_ENCODING_UTF_16 -> "UTF-16"
ID3_TEXT_ENCODING_UTF_16BE -> "UTF-16BE"
ID3_TEXT_ENCODING_UTF_8 -> "UTF-8"
ID3_TEXT_ENCODING_ISO_8859_1 -> "ISO-8859-1"
else -> "ISO-8859-1"
}
return Charset.forName(name)
}

// Otherwise ensure an even index and look for a second zero byte.
while (terminationPos < data.size - 1) {
if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) {
private fun indexOfEos(data: ByteArray, fromIndex: Int, encoding: Int): Int {
var terminationPos = indexOfZeroByte(data, fromIndex)

// For single byte encoding charsets, we're done.
if (encoding == ID3_TEXT_ENCODING_ISO_8859_1 || encoding == ID3_TEXT_ENCODING_UTF_8) {
return terminationPos
}
terminationPos = indexOfZeroByte(data, terminationPos + 1)
}

return data.size
}
// Otherwise ensure an even index and look for a second zero byte.
while (terminationPos < data.size - 1) {
if (terminationPos % 2 == 0 && data[terminationPos + 1] == 0.toByte()) {
return terminationPos
}
terminationPos = indexOfZeroByte(data, terminationPos + 1)
}

private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int {
for (i in fromIndex until data.size) {
if (data[i] == 0.toByte()) {
return i
return data.size
}

private fun indexOfZeroByte(data: ByteArray, fromIndex: Int): Int {
for (i in fromIndex until data.size) {
if (data[i] == 0.toByte()) {
return i
}
}
return data.size
}
return data.size
}

private fun delimiterLength(encodingByte: Int): Int {
return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8)
1
else
2
}
private fun delimiterLength(encodingByte: Int): Int {
return if (encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8)
1
else
2
}

private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String {
return if (to <= from || to > data.size) {
""
} else String(data, from, to - from, charset)
private fun decodeStringIfValid(data: ByteArray, from: Int, to: Int, charset: Charset): String {
return if (to <= from || to > data.size) {
""
} else String(data, from, to - from, charset)
}
}
}
}
}

0 comments on commit 2e9cea2

Please sign in to comment.