Skip to content

Commit

Permalink
Merge pull request #538 from wcmc-its/development
Browse files Browse the repository at this point in the history
Merging the changes from development to master.
  • Loading branch information
paulalbert1 authored Jan 9, 2025
2 parents 1a5e885 + f69f05d commit 9faf5b7
Show file tree
Hide file tree
Showing 25 changed files with 63,936 additions and 59,578 deletions.
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ RUN apt-get update && apt-get install -y \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y \
python3.12 \
python3.12-distutils \
wget \
unzip \
libatlas-base-dev \
libhdf5-dev \
libhdf5-serial-dev \
libjpeg-dev \
zlib1g-dev \

&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

Expand Down
16 changes: 8 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
</licenses>

<developers>
<developer>
<name>Mahender Reddy Jangari</name>
<email>[email protected]</email>
<organization>Weill Cornell Medicine ITS</organization>
<organizationUrl>https://github.com/wcmc-its</organizationUrl>
<timezone>GMT-5.5</timezone>
</developer>
<developer>
<name>Sarbajit Dutta</name>
<email>[email protected]</email>
Expand All @@ -36,13 +43,6 @@
<organizationUrl>https://github.com/wcmc-its</organizationUrl>
<timezone>GMT-5</timezone>
</developer>
<developer>
<name>Mahender Reddy Jangari</name>
<email>[email protected]</email>
<organization>Weill Cornell Medicine ITS</organization>
<organizationUrl>https://github.com/wcmc-its</organizationUrl>
<timezone>GMT-5.5</timezone>
</developer>
</developers>

<scm>
Expand Down Expand Up @@ -315,7 +315,7 @@
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-article-model</artifactId>
<version>2.0.26</version>
<version>2.0.30</version>
</dependency>
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import reciter.algorithm.article.score.predictor.NeuralNetworkModelArticlesScorer;
import reciter.algorithm.evidence.StrategyContext;
import reciter.algorithm.evidence.article.RemoveReCiterArticleStrategyContext;
import reciter.algorithm.evidence.author.authorcount.AuthorCountStrategyContext;
import reciter.algorithm.evidence.author.authorcount.strategy.AuthorCountStrategy;
import reciter.algorithm.evidence.targetauthor.TargetAuthorStrategyContext;
import reciter.algorithm.evidence.targetauthor.affiliation.AffiliationStrategyContext;
import reciter.algorithm.evidence.targetauthor.affiliation.strategy.CommonAffiliationStrategy;
Expand Down Expand Up @@ -63,6 +65,7 @@
import reciter.engine.StrategyParameters;
import reciter.engine.analysis.evidence.AffiliationEvidence;
import reciter.engine.analysis.evidence.ArticleCountEvidence;
import reciter.engine.analysis.evidence.AuthorCountEvidence;
import reciter.engine.analysis.evidence.AuthorNameEvidence;
import reciter.engine.analysis.evidence.EducationYearEvidence;
import reciter.engine.analysis.evidence.EmailEvidence;
Expand All @@ -71,10 +74,12 @@
import reciter.engine.analysis.evidence.NonTargetAuthorScopusAffiliation;
import reciter.engine.analysis.evidence.RelationshipEvidence;
import reciter.engine.analysis.evidence.RelationshipNegativeMatch;
import reciter.engine.analysis.evidence.RelationshipPostiveMatch;
import reciter.engine.analysis.evidence.TargetAuthorPubmedAffiliation;
import reciter.engine.analysis.evidence.TargetAuthorScopusAffiliation;
import reciter.model.article.ReCiterArticle;
import reciter.model.article.ReCiterArticleFeedbackIdentityScore;
import reciter.model.article.ReCiterAuthor;
import reciter.model.identity.Identity;

/**
Expand Down Expand Up @@ -182,7 +187,6 @@ public class ReCiterArticleScorer extends AbstractArticleScorer {
*/
private GenderStrategyContext genderStrategyContext;


// private StrategyContext boardCertificationStrategyContext;
//
// private StrategyContext degreeStrategyContext;
Expand All @@ -193,6 +197,8 @@ public class ReCiterArticleScorer extends AbstractArticleScorer {

private StrategyContext coCitationStrategyContext;

private StrategyContext authorCountStrategyContext;

private List<StrategyContext> strategyContexts;

public static StrategyParameters strategyParameters;
Expand Down Expand Up @@ -225,6 +231,8 @@ public ReCiterArticleScorer(List<ReCiterArticle> reCiterArticles, Identity ident

this.bachelorsYearDiscrepancyStrategyContext = new DegreeStrategyContext(new YearDiscrepancyStrategy(DegreeType.BACHELORS));
this.doctoralYearDiscrepancyStrategyContext = new DegreeStrategyContext(new YearDiscrepancyStrategy(DegreeType.DOCTORAL));

this.authorCountStrategyContext = new AuthorCountStrategyContext(new AuthorCountStrategy(ReCiterArticleScorer.strategyParameters));

this.strategyContexts = new ArrayList<StrategyContext>();

Expand All @@ -250,7 +258,7 @@ public ReCiterArticleScorer(List<ReCiterArticle> reCiterArticles, Identity ident

// Re-run these evidence types (could have been removed or not processed in sequence).
this.strategyContexts.add(this.emailStrategyContext);

this.strategyContexts.add(this.authorCountStrategyContext);
}


Expand Down Expand Up @@ -312,7 +320,7 @@ public void runArticleScorer(List<ReCiterArticle> reCiterArticles, Identity iden
//((TargetAuthorStrategyContext) personTypeStrategyContext).executeStrategy(reCiterArticles, identity);
futures.add(submitAndLogTime("personType Category", executorService, personTypeStrategyContext, reCiterArticles, identity));
}

futures.add(submitAndLogTime("authorCount Category", executorService, authorCountStrategyContext, reCiterArticles, identity));


if(strategyParameters.isGender()) {
Expand All @@ -332,7 +340,6 @@ public void runArticleScorer(List<ReCiterArticle> reCiterArticles, Identity iden
boolean allTasksCompleted = true;
// Print execution times from futures


for (Future<?> future : futures) {
try {
future.get(); // Ensure all tasks are completed
Expand All @@ -342,7 +349,7 @@ public void runArticleScorer(List<ReCiterArticle> reCiterArticles, Identity iden
}
}
if (allTasksCompleted) {
slf4jLogger.error("All Idnetity score strategy contexts have been completed successfully.");
slf4jLogger.info("All Idnetity score strategy contexts have been completed successfully.");
} else {
slf4jLogger.error("One or more tasks failed; report generation may be incomplete.");
}
Expand Down Expand Up @@ -372,7 +379,7 @@ public List<ReCiterArticle> executePythonScriptForArticleIdentityTotalScore(List
// Format the current date and time to a safe string for file names
String timestamp = now.format(formatter);

String fileName = StringUtils.join(timestamp, "-" , identity.getUid(), "-identityOnlyScoringInput.json");
String fileName = StringUtils.join(identity.getUid(), "-identityOnlyScoringInput.json");
//PropertiesLoader("application.properties");// loading application.properties before retrieving specific property;
boolean isS3UploadRequired = isS3UploadRequired();
String identityS3BucketName = getProperty("aws.s3.feedback.score.bucketName");
Expand Down Expand Up @@ -400,7 +407,8 @@ public List<ReCiterArticle> executePythonScriptForArticleIdentityTotalScore(List
}
String isS3UploadRequiredString = Boolean.toString(isS3UploadRequired);
JSONArray articlesIdentityScoreTotal = nnmodel.executeArticleScorePredictor("Identity Score", "identityOnlyScoreArticles.py",fileName,identityS3BucketName,isS3UploadRequiredString);
return mapAuthorshipLikelihoodScore(reCiterArticles, articlesIdentityScoreTotal);
if(articlesIdentityScoreTotal!=null && articlesIdentityScoreTotal.length() > 0 )
return mapAuthorshipLikelihoodScore(reCiterArticles, articlesIdentityScoreTotal);



Expand All @@ -414,9 +422,11 @@ private static ReCiterArticleFeedbackIdentityScore mapToIdentityScore(ReCiterArt

try {


return new ReCiterArticleFeedbackIdentityScore(
article.getArticleId(),
getArticleCountScore(article.getArticleCountEvidence()),
getAuthorsCountScore(article.getAuthorCountEvidence()),
getEducationYearScore(article.getEducationYearEvidence()),
getEmailMatchScore(article.getEmailEvidence()),
getGenderScore(article.getGenderEvidence()),
Expand All @@ -427,8 +437,9 @@ private static ReCiterArticleFeedbackIdentityScore mapToIdentityScore(ReCiterArt
getNameMatchScore(article.getAuthorNameEvidence(), AuthorNameEvidence::getNameMatchMiddleScore),
getNameMatchScore(article.getAuthorNameEvidence(), AuthorNameEvidence::getNameMatchModifierScore),
getFeedbackScore(article.getOrganizationalEvidencesTotalScore()),
getRelationshipEvidenceTotalScore(article.getRelationshipEvidence()),
getNegativeMatchScore(article.getRelationshipEvidence()),
getRelationshipPositiveMatchScore(article.getRelationshipEvidence().getRelationshipPositiveMatch()),
getRelationshipNegativeMatchScore(article.getRelationshipEvidence().getRelationshipNegativeMatch()),
article.getRelationshipEvidence().getRelationshipIdentityCount(),
getNonTargetAuthorInstitutionalAffiliationScore(article.getAffiliationEvidence()),
getTargetAuthorAffiliationScore(article.getAffiliationEvidence()),
getPubmedTargetAuthorAffiliationScore(article.getAffiliationEvidence()),
Expand All @@ -451,7 +462,33 @@ private static double getArticleCountScore(ArticleCountEvidence evidence) {
.map(ArticleCountEvidence::getArticleCountScore)
.orElse(0.0);
}

private static double getAuthorsCountScore(AuthorCountEvidence evidence)
{
return Optional.ofNullable(evidence)
.map(AuthorCountEvidence::getAuthorCountScore)
.orElse(0.0);
}

// Function to calculate likelihood adjustment
private static Function<Double, Double> calculateLikelihoodAdjustment = authorCount -> {
// Baseline likelihood (at authorCountThreshold)

double y_baseline = strategyParameters.getInCoefficent() * Math.log(strategyParameters.getAuthorCountThreshold()) + strategyParameters.getConstantCoefficeint();

// Likelihood for the given author count
double y = authorCount > 0 ? strategyParameters.getInCoefficent() * Math.log(authorCount) + strategyParameters.getConstantCoefficeint() : y_baseline;

// Adjustment is scaled by gamma
return strategyParameters.getAuthorCountAdjustmentGamma() * (y - y_baseline);
};

// Function to calculate adjusted article count score
private static Function<Double, Double> calculateAdjustedArticleCountScore = authorCount -> {
// Apply the likelihood adjustment function
return calculateLikelihoodAdjustment.apply(authorCount);
};

private static double getEducationYearScore(EducationYearEvidence evidence) {
return Optional.ofNullable(evidence)
.map(EducationYearEvidence::getDiscrepancyDegreeYearDoctoralScore)
Expand Down Expand Up @@ -482,19 +519,6 @@ private static double getNameMatchScore(AuthorNameEvidence evidence, Function<Au
.orElse(0.0);
}

private static double getRelationshipEvidenceTotalScore(RelationshipEvidence evidence) {
return Optional.ofNullable(evidence)
.map(RelationshipEvidence::getRelationshipEvidenceTotalScore)
.orElse(0.0);
}

private static double getNegativeMatchScore(RelationshipEvidence evidence) {
return Optional.ofNullable(evidence)
.map(RelationshipEvidence::getRelationshipNegativeMatch)
.map(RelationshipNegativeMatch::getRelationshipNonMatchScore)
.orElse(0.0);
}

private static double getNonTargetAuthorInstitutionalAffiliationScore(AffiliationEvidence evidence) {
return Optional.ofNullable(evidence)
.map(AffiliationEvidence::getScopusNonTargetAuthorAffiliation)
Expand All @@ -517,31 +541,51 @@ private static double getPubmedTargetAuthorAffiliationScore(AffiliationEvidence
.map(TargetAuthorPubmedAffiliation::getTargetAuthorInstitutionalAffiliationMatchTypeScore)
.orElse(0.0);
}

private static double getRelationshipPositiveMatchScore (List<RelationshipPostiveMatch> evidences) {
return Optional.ofNullable(evidences)
.filter(list -> !list.isEmpty()) // Check if the list is not empty
.map(list -> list.get(0)) // Get the first RelationshipPostiveMatch
.map(RelationshipPostiveMatch::getRelationshipMatchingScore) // Get the matching score
.orElse(0.0); // Return 0.0 if the list is empty or no matching score is found
}

private static double getRelationshipNegativeMatchScore (RelationshipNegativeMatch negativeEvidence) {
return Optional.ofNullable(negativeEvidence)
.map(RelationshipNegativeMatch::getRelationshipNonMatchScore)
.orElse(0.0);
}
private static List<ReCiterArticle> mapAuthorshipLikelihoodScore(List<ReCiterArticle> reCiterArticles, JSONArray authorshipLikelihoodScoreArray)
{
return reCiterArticles.stream()
.filter(Objects::nonNull)
.map(article -> findJSONObjectById(authorshipLikelihoodScoreArray, article))
.filter(Objects::nonNull) // Filter out null values returned from findJSONObjectById
.collect(Collectors.toList()); // Collect the results if needed, or just perform the mapping

return reCiterArticles.stream()
.filter(Objects::nonNull) // Make sure the article is not null
.map(article -> {
// Find the JSON object that corresponds to this article's ID
ReCiterArticle reCiterArticle = findJSONObjectById(authorshipLikelihoodScoreArray, article);
// If we find the matching JSON, extract the score and set it
if (reCiterArticle == null) {
article.setAuthorshipLikelihoodScore(0.0);
}
return article; // Return the article with updated score
})
.collect(Collectors.toList()); // Collect updated articles into a list

}


// Helper method to find JSONObject by article
private static ReCiterArticle findJSONObjectById(JSONArray jsonArray, ReCiterArticle article) {
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
if (jsonObject.getLong("id") == article.getArticleId()) {
/*article.setAuthorshipLikelihoodScore(BigDecimal.valueOf(jsonObject.getDouble("scoreTotal")*100)
.setScale(3, RoundingMode.DOWN)
.doubleValue());*/
article.setAuthorshipLikelihoodScore(jsonObject.getDouble("scoreTotal")*100);
article.setAuthorshipLikelihoodScore(jsonObject.optDouble("scoreTotal",0.0)*100);
return article; // Return the modified article
}

}
if(article!=null)
article.setAuthorshipLikelihoodScore(0.0);
return article; // Return null if not found
}

private boolean isS3UploadRequired()
{

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*******************************************************************************/
package reciter.algorithm.evidence.author.authorcount;

import reciter.algorithm.evidence.targetauthor.AbstractTargetAuthorStrategyContext;
import reciter.algorithm.evidence.targetauthor.TargetAuthorStrategy;

public class AuthorCountStrategyContext extends AbstractTargetAuthorStrategyContext {

public AuthorCountStrategyContext(TargetAuthorStrategy strategy) {
super(strategy);
}
}
Loading

0 comments on commit 9faf5b7

Please sign in to comment.