Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/openaire/iis
Browse files Browse the repository at this point in the history
  • Loading branch information
LSmyrnaios committed Sep 15, 2020
2 parents 360bd67 + 53d6b98 commit c542a45
Show file tree
Hide file tree
Showing 34 changed files with 816 additions and 292 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Generates oozie properties which were not provided from commandline.
Expand All @@ -18,6 +20,8 @@
*/
public class GenerateOoziePropertiesMojo extends AbstractMojo {

private static final Logger logger = LoggerFactory.getLogger(GenerateOoziePropertiesMojo.class);

public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";

Expand All @@ -33,8 +37,8 @@ public void execute() throws MojoExecutionException, MojoFailureException {
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
generatedSandboxName);
} else {
System.out.println("unable to generate sandbox name from path: " +
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
logger.warn("unable to generate sandbox name from path: {}",
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.*;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

import java.io.File;
Expand Down Expand Up @@ -57,6 +59,8 @@
@Category(IntegrationTest.class)
public class AffMatchingAffOrgQualityTest {

private static final Logger logger = LoggerFactory.getLogger(AffMatchingAffOrgQualityTest.class);

private final static boolean PRINT_NOT_MATCHED = true;
private final static boolean PRINT_FALSE_POSITIVE_MATCHES = true;

Expand Down Expand Up @@ -197,7 +201,7 @@ private void printQualityFactor(String factorName, int goodCount, int totalCount
double factorPercentage = ((double) goodCount / totalCount) * 100;

String text = String.format("%-30s %5.2f%% (%d/%d)", factorName + ":", factorPercentage, goodCount, totalCount);
System.out.println(text);
logger.trace(text);
}

private AffMatchingService createAffMatchingService() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject;
import eu.dnetlib.iis.wf.affmatching.model.MatchedOrganization;
import eu.dnetlib.iis.wf.affmatching.model.SimpleAffMatchResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.sparkutils.test.SparkJob;
import pl.edu.icm.sparkutils.test.SparkJobBuilder;
import pl.edu.icm.sparkutils.test.SparkJobExecutor;
Expand All @@ -46,7 +48,9 @@
*/
@Category(IntegrationTest.class)
public class AffMatchingDocOrgQualityTest {


private static final Logger logger = LoggerFactory.getLogger(AffMatchingDocOrgQualityTest.class);

private final static String INPUT_DATA_DIR_PATH = "src/test/resources/experimentalData/input";

private SparkJobExecutor executor = new SparkJobExecutor();
Expand Down Expand Up @@ -116,8 +120,8 @@ public void affiliationMatchingJob_combined_data() throws IOException {


// log
System.out.println("\nALL TEST DATA");

logger.trace("ALL TEST DATA");

readResultsAndPrintQualityRate(of(
"src/test/resources/experimentalData/expectedOutput/matched_aff.json"));
Expand Down Expand Up @@ -208,7 +212,7 @@ private void printQualityFactor(String factorName, int goodCount, int totalCount
double factorPercentage = ((double)goodCount/totalCount)*100;

String text = String.format("%-20s %5.2f%% (%d/%d)", factorName + ":", factorPercentage, goodCount, totalCount);
System.out.println(text);
logger.trace(text);


}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import eu.dnetlib.iis.metadataextraction.schemas.Affiliation;
import eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata;
import eu.dnetlib.iis.wf.affmatching.model.SimpleAffMatchResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* A helper that prints results of affiliation matching (actual matched affiliations in relation to
Expand All @@ -23,7 +25,9 @@
*/

public class AffMatchingResultPrinter {


private static final Logger logger = LoggerFactory.getLogger(AffMatchingResultPrinter.class);

private static final Comparator<SimpleAffMatchResult> RESULT_COMPARATOR = Comparator
.comparing(SimpleAffMatchResult::getDocumentId)
.thenComparingInt(SimpleAffMatchResult::getAffiliationPosition);
Expand All @@ -49,8 +53,8 @@ public static void printFalsePositives(String inputAffDirPath, String inputOrgDi
.filter(x -> !expectedMatches.contains(x))
.sorted(RESULT_COMPARATOR)
.collect(toList());
System.out.println("\n\t-------------------- false positives ---------------------");

logger.trace("-------------------- false positives ---------------------");

for (SimpleAffMatchResult falsePositive : falsePositives) {

Expand All @@ -63,14 +67,14 @@ public static void printFalsePositives(String inputAffDirPath, String inputOrgDi
List<Organization> expectedOrgs = expectedOrgIds.stream().map(x -> fetchOrganization(organizations, x)).collect(toList());

Organization actualOrg = fetchOrganization(organizations, falsePositive.getOrganizationId());
System.out.println("Document id: " + documentId + " \tPosition: " + affiliationPosition);
System.out.println("Affiliation: " + affiliation);
System.out.println("Was matched to: " + actualOrg);

logger.trace("Document id: " + documentId + " \tPosition: " + affiliationPosition);
logger.trace("Affiliation: " + affiliation);
logger.trace("Was matched to: " + actualOrg);


if (expectedOrgs.isEmpty()) {
System.out.println("Should match to: null");
logger.trace("Should match to: null");
}
for (int i=0; i<expectedOrgs.size(); ++i) {

Expand All @@ -80,12 +84,11 @@ public static void printFalsePositives(String inputAffDirPath, String inputOrgDi

String shouldMatchPrefix = (i == 0) ? "Should match to: " : "and: ";
String alreadyMatchedString = alreadyMatched ? "(already matched) " : "";
System.out.println(shouldMatchPrefix + alreadyMatchedString + expectedOrgs.get(i));

logger.trace(shouldMatchPrefix + alreadyMatchedString + expectedOrgs.get(i));

}
System.out.println();


}

}
Expand All @@ -104,21 +107,20 @@ public static void printNotMatched(String inputAffDirPath, String inputOrgDirPat
.filter(x -> !actualMatches.contains(x))
.sorted(RESULT_COMPARATOR)
.collect(toList());
System.out.println("\n\t--------------------- not matched --------------------");


logger.trace("--------------------- not matched --------------------");

for (SimpleAffMatchResult match : notMatched) {

Affiliation affiliation = fetchAffiliation(docsAffiliations, match.getDocumentId(), match.getAffiliationPosition());

Organization expectedOrg = fetchOrganization(organizations, match.getOrganizationId());


System.out.println("Document id: " + match.getDocumentId() + " \tPosition: " + match.getAffiliationPosition());
System.out.println("Affiliation: " + affiliation);
System.out.println("Should match to: " + expectedOrg);
System.out.println();


logger.trace("Document id: " + match.getDocumentId() + " \tPosition: " + match.getAffiliationPosition());
logger.trace("Affiliation: " + affiliation);
logger.trace("Should match to: " + expectedOrg);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
Expand All @@ -48,7 +50,6 @@
import static eu.dnetlib.iis.wf.affmatching.match.FirstWordsHashBucketMatcherFactory.createNameFirstWordsHashBucketMatcher;
import static eu.dnetlib.iis.wf.affmatching.match.FirstWordsHashBucketMatcherFactory.createNameFirstWordsHashBucketMatcherVoters;
import static eu.dnetlib.iis.wf.affmatching.match.MainSectionHashBucketMatcherFactory.*;
import static java.lang.System.out;
import static java.util.stream.Collectors.toList;
import static org.hamcrest.MatcherAssert.assertThat;

Expand All @@ -63,6 +64,8 @@
@Category(IntegrationTest.class)
public class AffOrgMatchVoterStrengthEstimatorAndTest {

private static final Logger logger = LoggerFactory.getLogger(AffOrgMatchVoterStrengthEstimatorAndTest.class);

private final static boolean PRINT_NOT_MATCHED = false;

private final static boolean PRINT_FALSE_POSITIVES = true;
Expand Down Expand Up @@ -141,8 +144,8 @@ public void estimateAndCheckVoterStrengths() throws IOException {

// assert
if (CollectionUtils.isNotEmpty(invalidVoterStrengths)) {
System.out.println("Invalid Voter Strengths. Change them manually to the calculated values (in the code):\n");
invalidVoterStrengths.forEach(System.out::println);
logger.trace("Invalid Voter Strengths. Change them manually to the calculated values (in the code):");
invalidVoterStrengths.stream().map(InvalidVoterStrength::toString).forEach(logger::debug);
}

assertThat(invalidVoterStrengths, Matchers.emptyIterable());
Expand Down Expand Up @@ -221,8 +224,6 @@ private void estimateVoterMatchStrengths(AffOrgMatcher affOrgMatcher, String aff
FileUtils.deleteDirectory(new File(outputDirPath));
}

out.println("\n\n");

FileUtils.deleteDirectory(workingDir);
}

Expand All @@ -238,15 +239,14 @@ private void checkIfVoterStrengthSetCorrectly(String affOrgMatcherName, AffOrgMa
}

private void printVoterHeader(AffOrgMatchVoter voter) {
out.println("\n\n");
out.println("---------------------------------- VOTER ----------------------------------------");
out.println(voter.toString() + "\n");
logger.trace("---------------------------------- VOTER ----------------------------------------");
logger.trace(voter.toString());
}

private void printMatcherHeader(String affOrgMatcherName) {
out.println("\n\n==================================================================================");
out.println("========================= " + affOrgMatcherName + " ===========================");
out.println("==================================================================================");
logger.trace("==================================================================================");
logger.trace("========================= " + affOrgMatcherName + " ===========================");
logger.trace("==================================================================================");
}

private void createInputData() throws IOException {
Expand Down Expand Up @@ -281,8 +281,6 @@ private float calcAndPrintResult(List<String> expectedResultsJsonPaths) throws I
printNumberDetails(expectedMatches.size(), actualMatches.size(), correctMatches.size(), falsePositives.size());
}

out.println();

if (PRINT_FALSE_POSITIVES) {
printFalsePositives(inputAffDirPath, inputOrgDirPath, expectedMatches, actualMatches);
}
Expand All @@ -299,25 +297,20 @@ private float calcMatchStrength(int numberOfActualMatches, int numberOfCorrectMa
}

private void printMatchStrength(float matchStrength) {
out.printf("%s %1." + VOTER_MATCH_STRENGTH_SCALE + "f", "MATCH STRENGTH: ", matchStrength);
logger.trace(String.format("%s %1." + VOTER_MATCH_STRENGTH_SCALE + "f", "MATCH STRENGTH: ", matchStrength));
}

private void printNumberDetails(int numberOfExpectedMatches, int numberOfActualMatches, int numberOfCorrectMatches, int numberOfFalsePositives) {
out.print(" [");
printQualityFactor("All matches", numberOfActualMatches, numberOfExpectedMatches);
out.print(", ");
printQualityFactor("Correct matches", numberOfCorrectMatches, numberOfActualMatches);
out.print(", ");
printQualityFactor("False positives", numberOfFalsePositives, numberOfActualMatches);
out.print("]");
logger.trace("[{}, {}, {}]",
qualityFactor("All matches", numberOfActualMatches, numberOfExpectedMatches),
qualityFactor("Correct matches", numberOfCorrectMatches, numberOfActualMatches),
qualityFactor("False positives", numberOfFalsePositives, numberOfActualMatches));
}

private void printQualityFactor(String factorName, int goodCount, int totalCount) {
private String qualityFactor(String factorName, int goodCount, int totalCount) {
double factorPercentage = ((double)goodCount/totalCount)*100;

String text = String.format("%s %3.2f%% (%d/%d)", factorName + ":", factorPercentage, goodCount, totalCount);

System.out.print(text);
return String.format("%s %3.2f%% (%d/%d)", factorName + ":", factorPercentage, goodCount, totalCount);
}

private AffMatchingService createAffMatchingService() throws IOException {
Expand Down
9 changes: 9 additions & 0 deletions iis-wf/iis-wf-affmatching/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Set everything to be logged to the console
log4j.rootCategory=WARN, console

log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

log4j.logger.eu.dnetlib.iis=DEBUG
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.dnetlib.iis.wf.importer;

import java.io.Closeable;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;

/**
* HTTP client utility class.
*
* @author mhorst
*
*/
public class HttpClientUtils {

/**
* Builds {@link Closeable} HTTP client issuing requests to remote endpoint.
*/
public static CloseableHttpClient buildHttpClient(int connectionTimeout, int readTimeout) {
HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
httpClientBuilder.setDefaultRequestConfig(RequestConfig.custom().setConnectTimeout(connectionTimeout)
.setConnectionRequestTimeout(connectionTimeout).setSocketTimeout(readTimeout).build());
return httpClientBuilder.build();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;

Expand All @@ -52,6 +50,7 @@
import eu.dnetlib.iis.common.java.porttype.AvroPortType;
import eu.dnetlib.iis.common.java.porttype.PortType;
import eu.dnetlib.iis.referenceextraction.softwareurl.schemas.SoftwareHeritageOrigin;
import eu.dnetlib.iis.wf.importer.HttpClientUtils;

/**
* Importer module retrieving (incrementally) origins from Software Heritage RESTful endpoint.
Expand Down Expand Up @@ -206,10 +205,7 @@ protected DataFileWriter<SoftwareHeritageOrigin> getWriter(FileSystem fs, PortBi
* Builds HTTP client issuing requests to SH endpoint.
*/
protected CloseableHttpClient buildHttpClient(int connectionTimeout, int readTimeout) {
HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
httpClientBuilder.setDefaultRequestConfig(RequestConfig.custom().setConnectTimeout(connectionTimeout)
.setConnectionRequestTimeout(connectionTimeout).setSocketTimeout(readTimeout).build());
return httpClientBuilder.build();
return HttpClientUtils.buildHttpClient(connectionTimeout, readTimeout);
}

protected static void storeNextElementIndex(int nextElementIndex) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package eu.dnetlib.iis.wf.importer;

import static org.junit.Assert.assertNotNull;

import org.apache.http.impl.client.CloseableHttpClient;
import org.junit.Test;

public class HttpClientUtilsTest {


@Test
public void testBuildHttpClient() throws Exception {
// given
int connectionTimeout = 1;
int readTimeout = 2;

// execute
CloseableHttpClient client = HttpClientUtils.buildHttpClient(connectionTimeout, readTimeout);

// assert
assertNotNull(client);
}
}
Loading

0 comments on commit c542a45

Please sign in to comment.