Skip to content

Commit

Permalink
[ issue #47 ] First tranche of Integration tests for Facet Object
Browse files Browse the repository at this point in the history
Queries
  • Loading branch information
Andrea Gazzarini committed Apr 15, 2015
1 parent 08d08a5 commit 2c33fa3
Show file tree
Hide file tree
Showing 11 changed files with 682 additions and 214 deletions.
12 changes: 11 additions & 1 deletion solrdf/src/main/java/org/gazzax/labs/solrdf/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ public static boolean isNullOrEmpty(final String value) {

public static String round(final String numericStringValue) {
final int indexOfDot = numericStringValue.indexOf(".");
return indexOfDot != -1 ? numericStringValue.substring(0, indexOfDot) : numericStringValue;
if (indexOfDot == -1) {
return numericStringValue;
}

final String d = numericStringValue.substring(indexOfDot + 1);
for (int index = 0; index < d.length(); index++) {
if (d.charAt(index) != '0') {
return numericStringValue;
}
}
return numericStringValue.substring(0, indexOfDot);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
* @see https://cwiki.apache.org/confluence/display/solr/Faceting#Faceting-RangeFaceting
*/
public abstract class FacetQuery {
public final static String STRING_HINT = "str";
public final static String BOOLEAN_HINT = "bool";
public final static String NUMERIC_HINT = "num";
public final static String DATE_HINT = "date";

protected final int index;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
package org.gazzax.labs.solrdf.handler.search.faceting;

import static org.gazzax.labs.solrdf.Strings.round;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.gazzax.labs.solrdf.Strings.*;

import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.ReaderUtil;
Expand Down Expand Up @@ -45,9 +43,6 @@
*/
final class NumericFacets {

NumericFacets() {
}

static class HashTable {

static final float LOAD_FACTOR = 0.7f;
Expand Down Expand Up @@ -132,11 +127,8 @@ public static NamedList<Integer> getCounts(
final int mincount,
final boolean missing,
final String sort) throws IOException {
final boolean zeros = mincount <= 0;
// mincount = Math.max(mincount, 1);

final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
final SchemaField schemaField = searcher.getSchema().getField(fieldName);
final FieldType ft = schemaField.getType();
final NumericType numericType = ft.getNumericType();
if (numericType == null) {
throw new IllegalStateException();
Expand All @@ -162,24 +154,6 @@ public static NamedList<Integer> getCounts(
case LONG:
longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
break;
case INT:
final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
@Override
public long get(int docID) {
return ints.get(docID);
}
};
break;
case FLOAT:
final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
@Override
public long get(int docID) {
return NumericUtils.floatToSortableInt(floats.get(docID));
}
};
break;
case DOUBLE:
final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
Expand Down Expand Up @@ -238,136 +212,48 @@ protected boolean lessThan(Entry a, Entry b) {
}

// 4. build the NamedList
final ValueSource vs = ft.getValueSource(sf, null);
final ValueSource vs = ft.getValueSource(schemaField, null);
final NamedList<Integer> result = new NamedList<>();

// This stuff is complicated because if facet.mincount=0, the counts
// needs
// to be merged with terms from the terms dict
if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
// Only keep items we're interested in
final Deque<Entry> counts = new ArrayDeque<>();
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}

// Entries from the PQ first, then using the terms dictionary
for (Entry entry : counts) {
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
result.add(round(values.strVal(entry.docID - leaves.get(readerIdx).docBase)), entry.count);
}

if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
+ sf.getName() + " which is not indexed");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<>();
while (pq.size() > 0) {
Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
}

for (int i = 0; i < result.size(); ++i) {
alreadySeen.add(result.getName(i));
}

final Terms terms = searcher.getAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator(null);
BytesRef term;
switch (termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}

final CharsRef spare = new CharsRef();
for (int skipped = hashTable.size; skipped < offset && term != null
&& StringHelper.startsWith(term, prefix);) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
++skipped;
}
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix)
&& (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = round(spare.toString());
if (!alreadySeen.contains(termStr)) {

result.add(termStr, 0);
}
}
}
final Map<String, Integer> counts = new HashMap<>();

while (pq.size() > 0) {
final Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}

final Terms terms = searcher.getAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
} else {
// sort=index, mincount=0 and we have less than limit items
// => Merge the PQ and the terms dictionary on the fly
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
+ FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
final TermsEnum termsEnum = terms.iterator(null);
BytesRef term;
switch (termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final Map<String, Integer> counts = new HashMap<>();
while (pq.size() > 0) {
final Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
final CharsRef spare = new CharsRef();
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
term = termsEnum.next();
}
final Terms terms = searcher.getAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator(null);
BytesRef term;
switch (termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRef spare = new CharsRef();
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum
.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
Integer count = counts.get(termStr);
if (count == null) {
count = 0;
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
final Integer count = counts.get(termStr);
if (count != null && count > 0) {
result.add(round(termStr), count);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package org.gazzax.labs.solrdf.handler.search.faceting;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.SortedDocValues;
Expand All @@ -12,11 +18,9 @@
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
Expand All @@ -26,6 +30,14 @@
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.BoundedTreeSet;

/**
* A class that generates facet information for a given request. Note that it
* extends the already existing {@link SimpleFacets} in order to reuse that
* logic as much as possible.
*
* @author Andrea Gazzarini
* @since 1.0
*/
// FIXME this depends on SimpleFacets (several static method calls)
public class PerSegmentSingleValuedFaceting {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,14 @@ public NamedList<Object> call() throws Exception {
final NamedList<Object> result = new SimpleOrderedMap<>();
if (termList != null) {
result.add(
foq.alias(),
foq.key(),
getListedTermCounts(
workerFacetValue,
collector.getDocSet(),
StrUtils.splitSmart(termList, ",", true)));
} else {
result.add(
foq.alias(),
foq.key(),
getTermCounts(foq, collector.getDocSet()));
}
return result;
Expand Down Expand Up @@ -446,7 +446,7 @@ public NamedList<Object> getFacetDateCounts() {
* @param base the values constraint for this specific count computation.
*/
public NamedList<Integer> getTermCounts(final FacetObjectQuery query, final DocSet base) throws IOException {
final int mincount = query.optionalInt(FacetParams.FACET_MINCOUNT, 0);
final int mincount = Math.max(query.optionalInt(FacetParams.FACET_MINCOUNT, 1), 1);
return getTermCounts(query, mincount, base);
}

Expand Down
Loading

0 comments on commit 2c33fa3

Please sign in to comment.