[ issue #47 ] First tranche of Integration tests for Facet Object

Queries
spaziocodice · Apr 15, 2015 · 2c33fa3 · 2c33fa3
1 parent 08d08a5
commit 2c33fa3
Show file tree

Hide file tree

Showing 11 changed files with 682 additions and 214 deletions.
diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/Strings.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/Strings.java
@@ -29,6 +29,16 @@ public static boolean isNullOrEmpty(final String value) {
 
 	public static String round(final String numericStringValue) {
 		final int indexOfDot = numericStringValue.indexOf(".");
-		return indexOfDot != -1 ? numericStringValue.substring(0, indexOfDot) : numericStringValue;
+		if (indexOfDot == -1) {
+			return numericStringValue;
+		}
+
+		final String d = numericStringValue.substring(indexOfDot + 1);
+		for (int index = 0; index < d.length(); index++) {
+			if (d.charAt(index) != '0') {
+				return numericStringValue;
+			}
+		}
+		return numericStringValue.substring(0, indexOfDot);			
 	}
 }
diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/FacetQuery.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/FacetQuery.java
@@ -18,6 +18,10 @@
  * @see https://cwiki.apache.org/confluence/display/solr/Faceting#Faceting-RangeFaceting 
  */
 public abstract class FacetQuery {
+	public final static String STRING_HINT = "str";
+	public final static String BOOLEAN_HINT = "bool";
+	public final static String NUMERIC_HINT = "num";
+	public final static String DATE_HINT = "date";
 
 	protected final int index;
 

diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/NumericFacets.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/NumericFacets.java
@@ -1,16 +1,14 @@
 package org.gazzax.labs.solrdf.handler.search.faceting;
 
+import static org.gazzax.labs.solrdf.Strings.round;
+
 import java.io.IOException;
-import java.util.ArrayDeque;
 import java.util.Collections;
-import java.util.Deque;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import static org.gazzax.labs.solrdf.Strings.*;
+
 import org.apache.lucene.document.FieldType.NumericType;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.ReaderUtil;
@@ -45,9 +43,6 @@
  */
 final class NumericFacets {
 
-	NumericFacets() {
-	}
-
 	static class HashTable {
 
 		static final float LOAD_FACTOR = 0.7f;
@@ -132,11 +127,8 @@ public static NamedList<Integer> getCounts(
 			final int mincount, 
 			final boolean missing, 
 			final String sort) throws IOException {
-		final boolean zeros = mincount <= 0;
-//		mincount = Math.max(mincount, 1);
-
-		final SchemaField sf = searcher.getSchema().getField(fieldName);
-		final FieldType ft = sf.getType();
+		final SchemaField schemaField = searcher.getSchema().getField(fieldName);
+		final FieldType ft = schemaField.getType();
 		final NumericType numericType = ft.getNumericType();
 		if (numericType == null) {
 			throw new IllegalStateException();
@@ -162,24 +154,6 @@ public static NamedList<Integer> getCounts(
 				case LONG:
 					longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
 					break;
-				case INT:
-					final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
-					longs = new FieldCache.Longs() {
-						@Override
-						public long get(int docID) {
-							return ints.get(docID);
-						}
-					};
-					break;
-				case FLOAT:
-					final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
-					longs = new FieldCache.Longs() {
-						@Override
-						public long get(int docID) {
-							return NumericUtils.floatToSortableInt(floats.get(docID));
-						}
-					};
-					break;
 				case DOUBLE:
 					final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
 					longs = new FieldCache.Longs() {
@@ -238,136 +212,48 @@ protected boolean lessThan(Entry a, Entry b) {
 		}
 
 		// 4. build the NamedList
-		final ValueSource vs = ft.getValueSource(sf, null);
+		final ValueSource vs = ft.getValueSource(schemaField, null);
 		final NamedList<Integer> result = new NamedList<>();
-
-		// This stuff is complicated because if facet.mincount=0, the counts
-		// needs
-		// to be merged with terms from the terms dict
-		if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
-			// Only keep items we're interested in
-			final Deque<Entry> counts = new ArrayDeque<>();
-			while (pq.size() > offset) {
-				counts.addFirst(pq.pop());
-			}
-
-			// Entries from the PQ first, then using the terms dictionary
-			for (Entry entry : counts) {
-				final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
-				final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
-				result.add(round(values.strVal(entry.docID - leaves.get(readerIdx).docBase)), entry.count);
-			}
-
-			if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
-				if (!sf.indexed()) {
-					throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
-							+ sf.getName() + " which is not indexed");
-				}
-				// Add zeros until there are limit results
-				final Set<String> alreadySeen = new HashSet<>();
-				while (pq.size() > 0) {
-					Entry entry = pq.pop();
-					final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
-					final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
-					alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
-				}
-
-				for (int i = 0; i < result.size(); ++i) {
-					alreadySeen.add(result.getName(i));
-				}
-
-				final Terms terms = searcher.getAtomicReader().terms(fieldName);
-				if (terms != null) {
-					final String prefixStr = TrieField.getMainValuePrefix(ft);
-					final BytesRef prefix;
-					if (prefixStr != null) {
-						prefix = new BytesRef(prefixStr);
-					} else {
-						prefix = new BytesRef();
-					}
-					final TermsEnum termsEnum = terms.iterator(null);
-					BytesRef term;
-					switch (termsEnum.seekCeil(prefix)) {
-					case FOUND:
-					case NOT_FOUND:
-						term = termsEnum.term();
-						break;
-					case END:
-						term = null;
-						break;
-					default:
-						throw new AssertionError();
-					}
-
-					final CharsRef spare = new CharsRef();
-					for (int skipped = hashTable.size; skipped < offset && term != null
-							&& StringHelper.startsWith(term, prefix);) {
-						ft.indexedToReadable(term, spare);
-						final String termStr = spare.toString();
-						if (!alreadySeen.contains(termStr)) {
-							++skipped;
-						}
-						term = termsEnum.next();
-					}
-					for (; term != null && StringHelper.startsWith(term, prefix)
-							&& (limit < 0 || result.size() < limit); term = termsEnum.next()) {
-						ft.indexedToReadable(term, spare);
-						final String termStr = round(spare.toString());
-						if (!alreadySeen.contains(termStr)) {
-
-							result.add(termStr, 0);
-						}
-					}
-				}
+		final Map<String, Integer> counts = new HashMap<>();
+
+		while (pq.size() > 0) {
+			final Entry entry = pq.pop();
+			final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
+			final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
+			counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
+		}
+
+		final Terms terms = searcher.getAtomicReader().terms(fieldName);
+		if (terms != null) {
+			final String prefixStr = TrieField.getMainValuePrefix(ft);
+			final BytesRef prefix;
+			if (prefixStr != null) {
+				prefix = new BytesRef(prefixStr);
+			} else {
+				prefix = new BytesRef();
 			}
-		} else {
-			// sort=index, mincount=0 and we have less than limit items
-			// => Merge the PQ and the terms dictionary on the fly
-			if (!sf.indexed()) {
-				throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
-						+ FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
+			final TermsEnum termsEnum = terms.iterator(null);
+			BytesRef term;
+			switch (termsEnum.seekCeil(prefix)) {
+			case FOUND:
+			case NOT_FOUND:
+				term = termsEnum.term();
+				break;
+			case END:
+				term = null;
+				break;
+			default:
+				throw new AssertionError();
 			}
-			final Map<String, Integer> counts = new HashMap<>();
-			while (pq.size() > 0) {
-				final Entry entry = pq.pop();
-				final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
-				final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
-				counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
+			final CharsRef spare = new CharsRef();
+			for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
+				term = termsEnum.next();
 			}
-			final Terms terms = searcher.getAtomicReader().terms(fieldName);
-			if (terms != null) {
-				final String prefixStr = TrieField.getMainValuePrefix(ft);
-				final BytesRef prefix;
-				if (prefixStr != null) {
-					prefix = new BytesRef(prefixStr);
-				} else {
-					prefix = new BytesRef();
-				}
-				final TermsEnum termsEnum = terms.iterator(null);
-				BytesRef term;
-				switch (termsEnum.seekCeil(prefix)) {
-				case FOUND:
-				case NOT_FOUND:
-					term = termsEnum.term();
-					break;
-				case END:
-					term = null;
-					break;
-				default:
-					throw new AssertionError();
-				}
-				final CharsRef spare = new CharsRef();
-				for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
-					term = termsEnum.next();
-				}
-				for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum
-						.next()) {
-					ft.indexedToReadable(term, spare);
-					final String termStr = spare.toString();
-					Integer count = counts.get(termStr);
-					if (count == null) {
-						count = 0;
-					}
+			for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
+				ft.indexedToReadable(term, spare);
+				final String termStr = spare.toString();
+				final Integer count = counts.get(termStr);
+				if (count != null && count > 0) {
 					result.add(round(termStr), count);
 				}
 			}

diff --git a/...n/java/org/gazzax/labs/solrdf/handler/search/faceting/PerSegmentSingleValuedFaceting.java b/...n/java/org/gazzax/labs/solrdf/handler/search/faceting/PerSegmentSingleValuedFaceting.java
@@ -1,7 +1,13 @@
 package org.gazzax.labs.solrdf.handler.search.faceting;
 import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.*;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Future;
 
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.SortedDocValues;
@@ -12,11 +18,9 @@
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.packed.PackedInts;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.NamedList;
@@ -26,6 +30,14 @@
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.BoundedTreeSet;
 
+/**
+ * A class that generates facet information for a given request. Note that it
+ * extends the already existing {@link SimpleFacets} in order to reuse that
+ * logic as much as possible.
+ * 
+ * @author Andrea Gazzarini
+ * @since 1.0
+ */
 // FIXME this depends on SimpleFacets (several static method calls)
 public class PerSegmentSingleValuedFaceting {
 

diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java
@@ -175,14 +175,14 @@ public NamedList<Object> call() throws Exception {
 							final NamedList<Object> result = new SimpleOrderedMap<>();
 							if (termList != null) {
 								result.add(
-										foq.alias(), 
+										foq.key(), 
 										getListedTermCounts(
 												workerFacetValue, 
 												collector.getDocSet(), 
 												StrUtils.splitSmart(termList, ",", true)));
 							} else {
 								result.add(
-										foq.alias(), 
+										foq.key(), 
 										getTermCounts(foq, collector.getDocSet()));
 							}
 							return result;
@@ -446,7 +446,7 @@ public NamedList<Object> getFacetDateCounts() {
 	 * @param base the values constraint for this specific count computation.
 	 */
 	public NamedList<Integer> getTermCounts(final FacetObjectQuery query, final DocSet base) throws IOException {
-		final int mincount = query.optionalInt(FacetParams.FACET_MINCOUNT, 0);
+		final int mincount = Math.max(query.optionalInt(FacetParams.FACET_MINCOUNT, 1), 1);
 		return getTermCounts(query, mincount, base);
 	}