Wikidata · horaciovelvetine · Apr 25, 2024 · Apr 25, 2024 · Apr 29, 2024 · Apr 29, 2024
diff --git a/src/examples/FetchOnlineDataExample.java b/src/examples/FetchOnlineDataExample.java
@@ -21,7 +21,9 @@
  */
 
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
@@ -33,89 +35,177 @@
 import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
 import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
 
+/**
+  * This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned. 
+  * 
+  * This example does not download any dump files.
+ *<ul>
+  * <li> Fetches data for a single entity (& multiple entities) using a QID (unique Wikimedia identifier) and prints its title.</li>
+  * <li> Fetches data for multiple entities using site-key ("enwiki") and multiple titles and prints their QID's </li>
+  * <li> Searches for entities using a query (e.g. "Douglas Adams") and which language Wiki (or "IRI") to search, then prints the QID's and labels of the results. </li>
+  * <li> Fetches data for a single entity using its QID, but limits the volume of data returned by first applying site-link, language, and property filters</li>
+  *</ul>
+  * 
+  *
+  * @throws MediaWikiApiErrorException
+  * @throws IOException 
+
+  * @author Markus Kroetzsch
+ */
+
 public class FetchOnlineDataExample {
+	private static boolean printDataToResultsDirectory = false; // Set to true to write the full data output to the 'results' directory.
 
 	public static void main(String[] args) throws MediaWikiApiErrorException, IOException {
 		ExampleHelpers.configureLogging();
-		printDocumentation();
+		FetchOnlineDataExample.printDocumentation();
 
 		WikibaseDataFetcher wbdf = new WikibaseDataFetcher(
 				BasicApiConnection.getWikidataApiConnection(),
 				Datamodel.SITE_WIKIDATA);
 
+		FetchOnlineDataExample.fetchSingleEntityByQID(wbdf);
+		FetchOnlineDataExample.fetchEntitiesByTitles(wbdf);
+		FetchOnlineDataExample.fetchEntityResultsBySearchTerm(wbdf);
+		FetchOnlineDataExample.fetchEntityWithAppliedFilters(wbdf);
+	}
+
+	/**
+	 * Fetches data for a single entity using its QID (unique identifier).
+	 * The fetched data is then written to selected ouputs and prints the 'en' text title to the console.	
+	 */
+	public static void fetchSingleEntityByQID(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
 		System.out.println("*** Fetching data for one entity:");
-		EntityDocument q42 = wbdf.getEntityDocument("Q42");
-		System.out.println(q42);
 
-		if (q42 instanceof ItemDocument) {
-			System.out.println("The English name for entity Q42 is "
-					+ ((ItemDocument) q42).getLabels().get("en").getText());
+		// Multiple entities can be fetched in one go by providing a list of QID's & using the getEntityDocuments() method.
+		EntityDocument entityDocument = wbdf.getEntityDocument("Q42");
+
+		if (entityDocument instanceof ItemDocument) {
+			System.out.println("The English name for entity Q42 is: "
+					+ ((ItemDocument) entityDocument).getLabels().get("en").getText());
+			writeEntityDataToFile(entityDocument, "single-entity.txt");
 		}
 
-		System.out.println("*** Fetching data for several entities:");
-		Map<String, EntityDocument> results = wbdf.getEntityDocuments("Q80",
-				"P31");
-		// Keys of this map are Qids, but we only use the values here:
-		for (EntityDocument ed : results.values()) {
-			System.out.println("Successfully retrieved data for "
-					+ ed.getEntityId().getId());
+	}
+
+	/**
+	 * Fetches data for multiple enetities using their titles.
+	 * The fetched data is then written to selected ouputs and prints the unique QID to the console.
+	 */
+	public static void fetchEntitiesByTitles(WikibaseDataFetcher wbdf) throws MediaWikiApiErrorException, IOException {
+		System.out.println("*** Fetching data for entities by page titles:");
+
+		// Similar to above, single entities can be fetched by providing a single title & using the getEntityDocumentsByTitle() method.
+		Map<String, EntityDocument> entityDocuments = wbdf.getEntityDocumentsByTitle("enwiki", "Terry Pratchett",
+				"Neil Gaiman");
+
+		for (Entry<String, EntityDocument> entry : entityDocuments.entrySet()) {
+			System.out.println("The QID for the entity with page title \""
+					+ entry.getKey() + "\" is: " + entry.getValue().getEntityId().getId());
+			writeEntityDataToFile(entry.getValue(), "multiple-entities-" + entry.getKey() + ".txt");
 		}
+	}
 
-		System.out
-				.println("*** Fetching data using filters to reduce data volume:");
-		// Only site links from English Wikipedia:
-		wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki"));
-		// Only labels in French:
-		wbdf.getFilter().setLanguageFilter(Collections.singleton("fr"));
-		// No statements at all:
-		wbdf.getFilter().setPropertyFilter(Collections.emptySet());
+	/**
+	 * Fetches results data for whichever entities match the search term for language code provided.
+	 * The resultant data is then written to selected ouputs and prints the QID and label of the search results to the console.
+	 */
+	public static void fetchEntityResultsBySearchTerm(WikibaseDataFetcher wbdf)
+			throws MediaWikiApiErrorException, IOException {
+		System.out.println("*** Doing search on Wikidata for: Douglas Adams");
+
+		// Search for entities using a search term and language code.
+		List<WbSearchEntitiesResult> searchResults = wbdf.searchEntities("Douglas Adams", "fr");
+		if (printDataToResultsDirectory) {
+			try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream("search-results.txt"))) {
+				for (WbSearchEntitiesResult result : searchResults) {
+					writeSearchResutlsToFile(result, out);
+					System.out.println("Found entity with QID " + result.getEntityId() + " and label \""
+							+ result.getLabel() + "\".");
+				}
+			}
+		}
+	}
+
+	/**
+	 * Fetches data for a single entity using its QID and applies filters to reduce the volume of data returned.
+	 * The fetched data is then written to selected ouputs and prints the French label and English Wikipedia page title to the console.
+	 */
+	public static void fetchEntityWithAppliedFilters(WikibaseDataFetcher wbdf)
+			throws MediaWikiApiErrorException, IOException {
+		System.out.println("*** Fetching data using filters to reduce data volume:");
+
+		// apply filters to data to get only what we want...
+		wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); // Only site links from English Wikipedia
+		wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); // Only labels in French
+		wbdf.getFilter().setPropertyFilter(Collections.emptySet()); // No statements at all
+		// Fetch the entity data
 		EntityDocument q8 = wbdf.getEntityDocument("Q8");
+
 		if (q8 instanceof ItemDocument) {
 			System.out.println("The French label for entity Q8 is "
 					+ ((ItemDocument) q8).getLabels().get("fr").getText()
 					+ "\nand its English Wikipedia page has the title "
-					+ ((ItemDocument) q8).getSiteLinks().get("enwiki")
-					.getPageTitle() + ".");
+					+ ((ItemDocument) q8).getSiteLinks().get("enwiki").getPageTitle() + ".");
+			writeEntityDataToFile(q8, "filtered-entity.txt");
 		}
+	}
 
-		System.out.println("*** Fetching data based on page title:");
-		EntityDocument edPratchett = wbdf.getEntityDocumentByTitle("enwiki",
-				"Terry Pratchett");
-		System.out.println("The Qid of Terry Pratchett is "
-				+ edPratchett.getEntityId().getId());
-
-		System.out.println("*** Fetching data based on several page titles:");
-		results = wbdf.getEntityDocumentsByTitle("enwiki", "Wikidata",
-				"Wikipedia");
-		// In this case, keys are titles rather than Qids
-		for (Entry<String, EntityDocument> entry : results.entrySet()) {
-			System.out
-					.println("Successfully retrieved data for page entitled \""
-							+ entry.getKey() + "\": "
-							+ entry.getValue().getEntityId().getId());
+	/**
+	 * Uses a PrintStream and the included ExampleHelpers class to write the entity data to the included file name.
+	 * 
+	 * @param entityDocument The entity document to write to the file.
+	 * @param fileName The name of the file to write the entity data to.
+	 */
+	private static void writeEntityDataToFile(EntityDocument entityDocument, String fileName) {
+		if (!printDataToResultsDirectory) // skip writing to file if not enabled
+			return;
+
+		try (PrintStream out = new PrintStream(ExampleHelpers.openExampleFileOuputStream(fileName))) {
+			out.println(entityDocument);
+		} catch (IOException e) {
+			e.printStackTrace();
 		}
+	}
 
-		System.out.println("** Doing search on Wikidata:");
-		for(WbSearchEntitiesResult result : wbdf.searchEntities("Douglas Adams", "fr")) {
-			System.out.println("Found " + result.getEntityId() + " with label " + result.getLabel());
+	/**
+	* Writes the search results to the specified PrintStream.
+	*
+	* @param result The WbSearchEntitiesResult object containing the search results.
+	* @param out The PrintStream to write the results to.
+	*/
+	private static void writeSearchResutlsToFile(WbSearchEntitiesResult result, PrintStream out) {
+		if (!printDataToResultsDirectory) // skip writing to file if not enabled
+			return;
+
+		String output = "RESULT " + result.getTitle() + " DETAILS:" +
+				"\nconcept_uri:" + result.getConceptUri() +
+				"\ndescription:" + result.getDescription() +
+				"\nentity_ID:" + result.getEntityId() +
+				"\nlabel:" + result.getLabel() +
+				"\npage_ID:" + result.getPageId() +
+				"\nQID:" + result.getTitle() +
+				"\nURL:" + result.getUrl() +
+				"\n";
+
+		if (printDataToResultsDirectory) {
+			out.println(output);
 		}
-
-		System.out.println("*** Done.");
 	}
 
 	/**
-	 * Prints some basic documentation about this program.
-	 */
+	* Prints some basic documentation about this program.
+	*/
 	public static void printDocumentation() {
 		System.out
 				.println("********************************************************************");
 		System.out.println("*** Wikidata Toolkit: FetchOnlineDataExample");
 		System.out.println("*** ");
 		System.out
-				.println("*** This program fetches individual data using the wikidata.org API.");
+				.println(
+						"*** This example demonstrates fetching data from the Wikidata.org API by their QID, title, or query and applying various filters to reduce the volume of data returned.");
 		System.out.println("*** It does not download any dump files.");
 		System.out
 				.println("********************************************************************");
 	}
-
-}
+}