Skip to content

Commit

Permalink
use S2 id if paper has no DOI
Browse files Browse the repository at this point in the history
  • Loading branch information
rwst committed Mar 10, 2024
1 parent db80ae9 commit bd726b0
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 40 deletions.
7 changes: 3 additions & 4 deletions src/main/kotlin/org/reactome/lit_ball/common/LitBallQuery.kt
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,7 @@ data class LitBallQuery(
val file = File("${queryDir.absolutePath}/${FileType.FILTERED1.fileName}")
file.writeText(
json.encodeToString(
paperDetailsList.mapIndexed { idx, pd -> Paper(idx, pd, doi = pd.externalIds?.get("DOI")
?.uppercase()) })
paperDetailsList.mapIndexed { idx, pd -> Paper(idx, pd).uppercaseDoi().setPaperIdFromDetails() })
)
mergeIntoArchive(paperDetailsList)
} catch (e: Exception) {
Expand Down Expand Up @@ -294,7 +293,7 @@ data class LitBallQuery(
details.addAll(list)
file.writeText(
json.encodeToString(
details.mapIndexed { idx, pd -> Paper(idx, pd, doi = pd.externalIds?.get("DOI")?.uppercase()) })
details.mapIndexed { idx, pd -> Paper(idx, pd).uppercaseDoi().setPaperIdFromDetails() })
)
}
}
Expand All @@ -311,7 +310,7 @@ data class LitBallQuery(
&& dateMatcher.matches(it.publicationDate)
&& !matcher.parser2.match(it.title?: ""))
paperDetailsList.add(it)
}
}
// Bail out on Cancel
if (!result) return
Logger.i(tag, "Retained ${paperDetailsList.size} records")
Expand Down
23 changes: 22 additions & 1 deletion src/main/kotlin/org/reactome/lit_ball/common/Paper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,31 @@ class Paper(
val details: S2Service.PaperDetails = S2Service.PaperDetails(),
var tag: Tag = Tag.Rejected,
var flags: MutableSet<String> = mutableSetOf(),
var doi: String? = null,
var paperId: String? = null,
) {

override fun toString(): String {
return "Paper(details=$details, tag=$tag, flags=$flags)"
}

fun setPaperIdFromDetails(): Paper {
paperId = details.externalIds?.get("DOI")?.uppercase()
if (paperId.isNullOrBlank()) {
paperId = "S2:${details.paperId}"
}
return this
}

fun uppercaseDoi(): Paper {
val extIds = details.externalIds?.toMutableMap()
if (extIds != null) {
val oldDoi = extIds["DOI"]
val doi = oldDoi?.uppercase()
if (doi != null && doi != oldDoi) {
extIds["DOI"] = doi
details.externalIds = extIds
}
}
return this
}
}
38 changes: 16 additions & 22 deletions src/main/kotlin/org/reactome/lit_ball/common/PaperList.kt
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,18 @@ object PaperList {
} else {
mutableListOf()
}
papers.forEach { it.doi = it.details.externalIds?.get("DOI")?.uppercase() }
papers.forEach { it.setPaperIdFromDetails() }
accepted?.let {
papers = papers.filter { it.doi in accepted }.toMutableList()
papers = papers.filter { it.paperId in accepted }.toMutableList()
var maxId = if (papers.isNotEmpty()) papers.maxOf { it.id } else 0
val acceptedWithDetails = papers.map { it.doi ?: "" }.toSet()
val acceptedWithDetails = papers.map { it.paperId ?: "" }.toSet()
val acceptedWithoutDetails = accepted.minus(acceptedWithDetails).toList()
S2Client.getPaperDetails(acceptedWithoutDetails) {
val extIds = it.externalIds?.toMutableMap()
if (extIds != null) {
val oldDoi = extIds["DOI"]
val doi = oldDoi?.uppercase()
if (doi != null && doi != oldDoi) {
extIds["DOI"] = doi
it.externalIds = extIds
}
papers.add(Paper(id = maxId, details = it, doi = doi))
maxId += 1
}
val newPaper = Paper(id = maxId, details = it)
newPaper.uppercaseDoi()
newPaper.setPaperIdFromDetails()
papers.add(newPaper)
maxId += 1
}
}
listHandle.setFullList(papers)
Expand Down Expand Up @@ -131,7 +125,7 @@ object PaperList {
val pathPrefix = path?.substringBeforeLast("/")
val pathStr = "$pathPrefix/${fileType.fileName}"
val thisList = listHandle.getFullList().filter { it.tag == tag }
.mapNotNull { item -> item.doi }
.mapNotNull { item -> item.paperId }
theSet += thisList
File(pathStr).writeText(theSet.joinToString(separator = "\n", postfix = "\n"))
}
Expand Down Expand Up @@ -171,8 +165,8 @@ object PaperList {

fun delete(id: Int) {
val p = listHandle.getDisplayedPaper(id) ?: return
query.acceptedSet.removeIf { acc -> p.doi?.let { it == acc }?: false }
listHandle.delete(p.doi)
query.acceptedSet.removeIf { acc -> p.paperId?.let { it == acc }?: false }
listHandle.delete(p.paperId)
try {
writeToPath(Tag.Accepted, FileType.ACCEPTED, query.acceptedSet)
} catch (e: Exception) {
Expand All @@ -185,7 +179,7 @@ object PaperList {
val fList = listHandle.getFilteredList()
fList?.let {
listHandle.deleteAllFiltered()
val dois = it.map { p -> p.doi }.toSet()
val dois = it.map { p -> p.paperId }.toSet()
query.acceptedSet.removeIf { acc -> dois.contains(acc) }
try {
writeToPath(Tag.Accepted, FileType.ACCEPTED, query.acceptedSet)
Expand All @@ -211,7 +205,7 @@ object PaperList {
val revFile = File(exportedCatPath.replace("$", "Reviews"))
revFile.writeText(CSV_HEADER)
listHandle.getFullList().forEach {
val doi = it.doi
val doi = it.paperId
val date = it.details.publicationDate ?: ""
val pmid = it.details.externalIds?.get("PubMed")
val pmc = it.details.externalIds?.get("PubMedCentral")
Expand Down Expand Up @@ -245,7 +239,7 @@ object PaperList {
File(exportedPath).writeText("")
val json = ConfiguredUglyJson.get()
listHandle.getFullList().forEach { thePaper ->
val doi = thePaper.doi
val doi = thePaper.paperId
doi?.let { theDoi ->
val meta = mapOf("DOI" to JsonPrimitive(theDoi))
val outMap = emptyMap<String, JsonElement>().toMutableMap()
Expand Down Expand Up @@ -281,7 +275,7 @@ object PaperList {
DATE: ${p.details.publicationDate} $textPMID
A: ${p.details.abstract}
TLDR: ${p.details.tldr?.get("text")}
DOI: ${p.doi} TYPES: ${p.details.publicationTypes?.joinToString(" ")}
DOI: ${p.paperId} TYPES: ${p.details.publicationTypes?.joinToString(" ")}
""".trimIndent()
}

Expand Down Expand Up @@ -336,7 +330,7 @@ object PaperList {
listHandle.getFullList().forEach {
val text = (it.details.title ?: "") + " " + (it.details.tldr?.get("text") ?: "")
stringBuilder.append("\"" + NLPService.preprocess(text) + "\",")
stringBuilder.append("\"${it.doi ?: ""}\"\n")
stringBuilder.append("\"${it.paperId ?: ""}\"\n")
}
File(path).writeText(stringBuilder.toString())
}
Expand Down
14 changes: 7 additions & 7 deletions src/main/kotlin/org/reactome/lit_ball/common/PaperListHandle.kt
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,21 @@ class PaperListHandle {
fun delete(doi: String?) {
if (doi.isNullOrEmpty()) return
val tmp1 = fullList.toMutableList()
tmp1.removeIf { p -> p.doi?.let { it == doi }?: false }
tmp1.removeIf { p -> p.paperId?.let { it == doi }?: false }
fullList = tmp1.toList()
filteredList?.let { list ->
val tmp2 = list.toMutableList()
tmp2.removeIf { p -> p.doi?.let { it == doi } ?: false }
tmp2.removeIf { p -> p.paperId?.let { it == doi } ?: false }
filteredList = tmp2.toList()
}
updateShadowMap()
}

fun deleteAllFiltered() {
filteredList?.let { list ->
val dois = list.map { it.doi }.toSet()
val dois = list.map { it.paperId }.toSet()
val fList = fullList.toMutableList()
fList.removeIf { dois.contains(it.doi) }
fList.removeIf { dois.contains(it.paperId) }
fullList = fList.toList()
filteredList = null
updateShadowMap()
Expand Down Expand Up @@ -151,17 +151,17 @@ class PaperListHandle {
}

fun setFullTagsFromFiltered() {
val tagMap: Map<String, Tag> = filteredList?.associate { Pair(it.doi?: "",it.tag) } ?: emptyMap()
val tagMap: Map<String, Tag> = filteredList?.associate { Pair(it.paperId?: "",it.tag) } ?: emptyMap()
setFullTagsFromDoiMap(tagMap)
}

fun setFullTagsFromDoiMap(tagMap: Map<String, Tag>) {
val list = fullList.map {
val newTag = tagMap[it.doi]?: Tag.Accepted
val newTag = tagMap[it.paperId]?: Tag.Accepted
if (it.tag == newTag)
it
else
Paper(it.id, it.details, newTag, it.flags, it.details.externalIds?.get("DOI")?.uppercase())
Paper(it.id, it.details, newTag, it.flags).uppercaseDoi().setPaperIdFromDetails()
}
fullList = list
}
Expand Down
14 changes: 8 additions & 6 deletions src/main/kotlin/org/reactome/lit_ball/service/ScholarClient.kt
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,18 @@ object S2Client : ScholarClient {
val size = doiSet.size
val indicatorTitle = "Downloading missing titles, TLDRs,\nand abstracts"
var index = 0
doiSet.chunked(DETAILS_CHUNK_SIZE).forEach {
doiSet.chunked(DETAILS_CHUNK_SIZE).forEach { ids ->
val paperIds = ids.map { if ( it.startsWith("S2:")) it.substring(3) else it }
val pair = getDataOrHandleExceptions(index, size, indicatorTitle) {
S2Service.getBulkPaperDetails(
it,
paperIds,
"paperId,externalIds,title,abstract,publicationTypes,tldr,publicationDate"
)
}
if (!pair.second) return false
delay(strategy.delay(true))
pair.first?.filterNotNull()?.forEach(action) // DO NOT remove filterNotNull()
index += it.size
index += paperIds.size
if (!RootStore.setProgressIndication(indicatorTitle, (1f * index) / size, "$index/$size"))
return false
}
Expand All @@ -158,7 +159,7 @@ object S2Client : ScholarClient {
doiSet.forEachIndexed { index, it ->
val pair = getDataOrHandleExceptions(index, size, indicatorTitle) {
S2Service.getSinglePaperDetails(
it,
if ( it.startsWith("S2:")) it.substring(3) else it,
"paperId,externalIds,title,abstract,publicationTypes,tldr,publicationDate"
)
}
Expand Down Expand Up @@ -193,7 +194,7 @@ object S2Client : ScholarClient {
doiSet.forEachIndexed { index, doi ->
val pair = getDataOrHandleExceptions(index, size, indicatorTitle) {
S2Service.getPaperRefs(
doi,
if ( doi.startsWith("S2:")) doi.substring(3) else doi,
"paperId,citations,citations.externalIds,references,references.externalIds"
)
}
Expand All @@ -217,9 +218,10 @@ object S2Client : ScholarClient {
"citations for all accepted papers"
var index = 0
doiSet.chunked(DETAILS_CHUNK_SIZE).forEach { dois ->
val paperIds = dois.map { if ( it.startsWith("S2:")) it.substring(3) else it }
val pair = getDataOrHandleExceptions(index, size, indicatorTitle) {
S2Service.getBulkPaperRefs(
dois,
paperIds,
"paperId,citations,citations.externalIds,references,references.externalIds"
)
}
Expand Down

0 comments on commit bd726b0

Please sign in to comment.