Skip to content

Commit

Permalink
Add more urls in tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
LSmyrnaios committed Mar 7, 2019
1 parent 71f2ed5 commit 99ff77b
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ public static HashSet<String> extractInternalLinksFromHtml(String pageHtml) thro
&& !internalLink.startsWith("#")
&& !internalLink.startsWith("mailto:") && !internalLink.startsWith("tel:") && !internalLink.startsWith("{openurl}") )
{
//logger.debug("InternalLink: " + internalLink);
//logger.debug("Filtered InternalLink: " + internalLink);
String lowerCaseLink = internalLink.toLowerCase();
if ( lowerCaseLink.startsWith("javascript:") ) {
String pdfLink = null;
Expand All @@ -290,7 +290,7 @@ public static HashSet<String> extractInternalLinksFromHtml(String pageHtml) thro
pdfLink = pdfLinkMatcher.group(1);
throw new JavaScriptDocLinkFoundException(pdfLink); // If it's 'null', we treat it when handling this exception.
}
else // It's a javaScriptLink which we don't treat.
else // It's a javaScript link or element which we don't treat.
continue;
}
urls.add(internalLink);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ static void setExampleHtml() {


@BeforeAll
static void setExampleUrl() {
exampleUrl = "http://epress.lib.uts.edu.au/journals/index.php/mcs/article/view/5655";
}
//static void setExampleUrl() {exampleUrl = "http://epress.lib.uts.edu.au/journals/index.php/mcs/article/view/5655";}
//static void setExampleUrl() {exampleUrl = "https://halshs.archives-ouvertes.fr/halshs-01698574";}
//static void setExampleUrl() {exampleUrl = "https://doors.doshisha.ac.jp/duar/repository/ir/127/?lang=0";}
static void setExampleUrl() {exampleUrl = "https://www.sciencedirect.com/science/article/pii/S0042682297988747?via%3Dihub";}


//@Disabled
Expand Down Expand Up @@ -92,7 +93,7 @@ public void testExtractAllLinksFromHtml()
try {
ArrayList<String> links = new ArrayList<>(PageCrawler.extractInternalLinksFromHtml(exampleHtml));

logger.info("The list of all the internalLinks is:");
logger.info("The list of all the internalLinks of \"" + exampleUrl + "\" is:");
for ( String link: links )
logger.info(link);

Expand All @@ -111,7 +112,7 @@ public void testExtractAllLinksFromUrl()

ArrayList<String> links = new ArrayList<>(PageCrawler.extractInternalLinksFromHtml(getHtmlString(conn)));

logger.info("The list of all the internalLinks is:");
logger.info("The list of all the internalLinks of \"" + exampleUrl + "\" is:");
for ( String link: links )
logger.info(link);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void runIndividualTests()
//urlList.add("http://www.esocialsciences.org/Download/repecDownload.aspx?fname=Document18112005270.6813013.doc&fcategory=Articles&AId=236&fref=repec");
//urlList.add("https://wwwfr.uni.lu/content/download/35522/427398/file/2011-05%20-%20Demographic%20trends%20and%20international%20capital%20flows%20in%20an%20integrated%20world.pdf");
//urlList.add("http://www.grid.uns.ac.rs/jged/download.php?fid=108");
urlList.add("https://wwwfr.uni.lu/content/download/35522/427398/file/2011-05%20-%20Demographic%20trends%20and%20international%20capital%20flows%20in%20an%20integrated%20world.pdf");
//urlList.add("https://wwwfr.uni.lu/content/download/35522/427398/file/2011-05%20-%20Demographic%20trends%20and%20international%20capital%20flows%20in%20an%20integrated%20world.pdf");
urlList.add("https://www.scribd.com/document/397997565/Document-2-Kdashnk");

logger.info("Urls to check:");
for ( String url: urlList )
Expand Down

0 comments on commit 99ff77b

Please sign in to comment.