Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates Fetch Online Data Example for Readability & Adds JavaDoc Comments #6

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
186 changes: 138 additions & 48 deletions src/examples/FetchOnlineDataExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
*/

import java.io.IOException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

Expand All @@ -33,89 +35,177 @@
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;

/**
* This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned.
*
* This example does not download any dump files.
*<ul>
* <li> Fetches data for a single entity (& multiple entities) using a QID (unique Wikimedia identifier) and prints its title.</li>
* <li> Fetches data for multiple entities using site-key ("enwiki") and multiple titles and prints their QID's </li>
* <li> Searches for entities using a query (e.g. "Douglas Adams") and which language Wiki (or "IRI") to search, then prints the QID's and labels of the results. </li>
* <li> Fetches data for a single entity using its QID, but limits the volume of data returned by first applying site-link, language, and property filters</li>
*</ul>
*
*
* @throws MediaWikiApiErrorException
* @throws IOException

* @author Markus Kroetzsch
*/

public class FetchOnlineDataExample {
private static boolean printDataToResultsDirectory = false; // Set to true to write the full data output to the 'results' directory.

public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
ExampleHelpers.configureLogging();
printDocumentation();
FetchOnlineDataExample.printDocumentation();

WikibaseDataFetcher wbdf = new WikibaseDataFetcher(
BasicApiConnection.getWikidataApiConnection(),
Datamodel.SITE_WIKIDATA);

FetchOnlineDataExample.fetchSingleEntityByQID(wbdf);
FetchOnlineDataExample.fetchEntitiesByTitles(wbdf);
FetchOnlineDataExample.fetchEntityResultsBySearchTerm(wbdf);
FetchOnlineDataExample.fetchEntityWithAppliedFilters(wbdf);
}

/**
* Fetches data for a single entity using its QID (unique identifier).
* The fetched data is then written to selected ouputs and prints the 'en' text title to the console.
*/
public static void fetchSingleEntityByQID(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for one entity:");
EntityDocument q42 = wbdf.getEntityDocument("Q42");
System.out.println(q42);

if (q42 instanceof ItemDocument) {
System.out.println("The English name for entity Q42 is "
+ ((ItemDocument) q42).getLabels().get("en").getText());
// Multiple entities can be fetched in one go by providing a list of QID's & using the getEntityDocuments() method.
EntityDocument entityDocument = wbdf.getEntityDocument("Q42");

if (entityDocument instanceof ItemDocument) {
System.out.println("The English name for entity Q42 is: "
+ ((ItemDocument) entityDocument).getLabels().get("en").getText());
writeEntityDataToFile(entityDocument, "single-entity.txt");
}

System.out.println("*** Fetching data for several entities:");
Map<String, EntityDocument> results = wbdf.getEntityDocuments("Q80",
"P31");
// Keys of this map are Qids, but we only use the values here:
for (EntityDocument ed : results.values()) {
System.out.println("Successfully retrieved data for "
+ ed.getEntityId().getId());
}

/**
* Fetches data for multiple enetities using their titles.
* The fetched data is then written to selected ouputs and prints the unique QID to the console.
*/
public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data for entities by page titles:");

// Similar to above, single entities can be fetched by providing a single title & using the getEntityDocumentsByTitle() method.
Map<String, EntityDocument> entityDocuments = wbdf.getEntityDocumentsByTitle("enwiki", "Terry Pratchett",
"Neil Gaiman");

for (Entry<String, EntityDocument> entry : entityDocuments.entrySet()) {
System.out.println("The QID for the entity with page title \""
+ entry.getKey() + "\" is: " + entry.getValue().getEntityId().getId());
writeEntityDataToFile(entry.getValue(), "multiple-entities-" + entry.getKey() + ".txt");
}
}

System.out
.println("*** Fetching data using filters to reduce data volume:");
// Only site links from English Wikipedia:
wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki"));
// Only labels in French:
wbdf.getFilter().setLanguageFilter(Collections.singleton("fr"));
// No statements at all:
wbdf.getFilter().setPropertyFilter(Collections.emptySet());
/**
* Fetches results data for whichever entities match the search term for language code provided.
* The resultant data is then written to selected ouputs and prints the QID and label of the search results to the console.
*/
public static void fetchEntityResultsBySearchTerm(WikibaseDataFetcher wbdf)
throws MediaWikiApiErrorException, IOException {
System.out.println("*** Doing search on Wikidata for: Douglas Adams");

// Search for entities using a search term and language code.
List<WbSearchEntitiesResult> searchResults = wbdf.searchEntities("Douglas Adams", "fr");
if (printDataToResultsDirectory) {
try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("search-results.txt"))) {
for (WbSearchEntitiesResult result : searchResults) {
writeSearchResutlsToFile(result, out);
System.out.println("Found entity with QID " + result.getEntityId() + " and label \""
+ result.getLabel() + "\".");
}
}
}
}

/**
* Fetches data for a single entity using its QID and applies filters to reduce the volume of data returned.
* The fetched data is then written to selected ouputs and prints the French label and English Wikipedia page title to the console.
*/
public static void fetchEntityWithAppliedFilters(WikibaseDataFetcher wbdf)
throws MediaWikiApiErrorException, IOException {
System.out.println("*** Fetching data using filters to reduce data volume:");

// apply filters to data to get only what we want...
wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
// Fetch the entity data
EntityDocument q8 = wbdf.getEntityDocument("Q8");

if (q8 instanceof ItemDocument) {
System.out.println("The French label for entity Q8 is "
+ ((ItemDocument) q8).getLabels().get("fr").getText()
+ "\nand its English Wikipedia page has the title "
+ ((ItemDocument) q8).getSiteLinks().get("enwiki")
.getPageTitle() + ".");
+ ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
writeEntityDataToFile(q8, "filtered-entity.txt");
}
}

System.out.println("*** Fetching data based on page title:");
EntityDocument edPratchett = wbdf.getEntityDocumentByTitle("enwiki",
"Terry Pratchett");
System.out.println("The Qid of Terry Pratchett is "
+ edPratchett.getEntityId().getId());

System.out.println("*** Fetching data based on several page titles:");
results = wbdf.getEntityDocumentsByTitle("enwiki", "Wikidata",
"Wikipedia");
// In this case, keys are titles rather than Qids
for (Entry<String, EntityDocument> entry : results.entrySet()) {
System.out
.println("Successfully retrieved data for page entitled \""
+ entry.getKey() + "\": "
+ entry.getValue().getEntityId().getId());
/**
* Uses a PrintStream and the included ExampleHelpers class to write the entity data to the included file name.
*
* @param entityDocument The entity document to write to the file.
* @param fileName The name of the file to write the entity data to.
*/
private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
if (!printDataToResultsDirectory) // skip writing to file if not enabled
return;

try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
out.println(entityDocument);
} catch (IOException e) {
e.printStackTrace();
}
}

System.out.println("** Doing search on Wikidata:");
for(WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
System.out.println("Found " + result.getEntityId() + " with label " + result.getLabel());
/**
* Writes the search results to the specified PrintStream.
*
* @param result The WbSearchEntitiesResult object containing the search results.
* @param out The PrintStream to write the results to.
*/
private static void writeSearchResutlsToFile(WbSearchEntitiesResult result, PrintStream out) {
if (!printDataToResultsDirectory) // skip writing to file if not enabled
return;

String output = "RESULT " + result.getTitle() + " DETAILS:" +
"\nconcept_uri:" + result.getConceptUri() +
"\ndescription:" + result.getDescription() +
"\nentity_ID:" + result.getEntityId() +
"\nlabel:" + result.getLabel() +
"\npage_ID:" + result.getPageId() +
"\nQID:" + result.getTitle() +
"\nURL:" + result.getUrl() +
"\n";

if (printDataToResultsDirectory) {
out.println(output);
}

System.out.println("*** Done.");
}

/**
* Prints some basic documentation about this program.
*/
* Prints some basic documentation about this program.
*/
public static void printDocumentation() {
System.out
.println("********************************************************************");
System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
System.out.println("*** ");
System.out
.println("*** This program fetches individual data using the wikidata.org API.");
.println(
"*** This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned.");
System.out.println("*** It does not download any dump files.");
System.out
.println("********************************************************************");
}

}
}