Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wire format.tensors="hex" into /document/v1 API #33250

Merged
merged 1 commit into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ public Inspector inspect() {

@Override
public String toJson() {
return toJson(new JsonFormat.EncodeOptions(false, false, false));
return toJson(new JsonFormat.EncodeOptions());
}

public String toJson(boolean tensorShortForm) {
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, false, false));
return toJson(new JsonFormat.EncodeOptions(tensorShortForm));
}

public String toJson(boolean tensorShortForm, boolean tensorDirectValues) {
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues, false));
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues));
}

public String toJson(JsonFormat.EncodeOptions tensorOptions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,20 @@ static void wrapIOException(SubroutineThrowingIOException lambda) {
}
}

// TODO: remove
public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value,
boolean shortForm, boolean directValues) {
serializeTensorField(generator, field, value, new JsonFormat.EncodeOptions(shortForm, directValues));

}

public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value,
JsonFormat.EncodeOptions tensorOptions) {
wrapIOException(() -> {
fieldNameIfNotNull(generator, field);
if (value.getTensor().isPresent()) {
Tensor tensor = value.getTensor().get();
byte[] encoded = JsonFormat.encode(tensor, shortForm, directValues);
byte[] encoded = JsonFormat.encode(tensor, tensorOptions);
generator.writeRawValue(new String(encoded, StandardCharsets.UTF_8));
}
else {
Expand Down
48 changes: 39 additions & 9 deletions document/src/main/java/com/yahoo/document/json/JsonWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.document.datatypes.WeightedSet;
import com.yahoo.document.serialization.DocumentWriter;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.vespa.objects.FieldBase;
import com.yahoo.vespa.objects.Serializer;

Expand Down Expand Up @@ -83,9 +84,7 @@ public class JsonWriter implements DocumentWriter {
.build();

private final JsonGenerator generator;

private final boolean tensorShortForm;
private final boolean tensorDirectValues;
private final JsonFormat.EncodeOptions tensorOptions;

/**
* Creates a JsonWriter.
Expand All @@ -97,26 +96,42 @@ public JsonWriter(OutputStream out) {
this(createPrivateGenerator(out));
}

// do not use
public JsonWriter(OutputStream out, boolean tensorShortForm, boolean tensorDirectValues) {
this(createPrivateGenerator(out), tensorShortForm, tensorDirectValues);
}

public JsonWriter(OutputStream out, JsonFormat.EncodeOptions tensorOptions) {
this(createPrivateGenerator(out), tensorOptions);
}

/**
* Create a Document writer which will write to the input JSON generator.
* JsonWriter will not close the generator and only flush it explicitly
* after having written a full Document instance. In other words, JsonWriter
* will <i>not</i> take ownership of the generator.
* TODO: remove.
*
* @param generator the output JSON generator
* @param tensorShortForm whether to use the short type-dependent form for tensor values
* @param tensorDirectValues whether to output tensor values directly or wrapped in a map also containing the type
*/
public JsonWriter(JsonGenerator generator, boolean tensorShortForm, boolean tensorDirectValues) {
this(generator, new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues));
}

/**
* Create a Document writer which will write to the input JSON generator.
* JsonWriter will not close the generator and only flush it explicitly
* after having written a full Document instance. In other words, JsonWriter
* will <i>not</i> take ownership of the generator.
*
* @param generator the output JSON generator
* @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not)
*/
public JsonWriter(JsonGenerator generator, JsonFormat.EncodeOptions tensorOptions) {
this.generator = generator;
this.tensorShortForm = tensorShortForm;
this.tensorDirectValues = tensorDirectValues;
this.tensorOptions = tensorOptions;
}


private static JsonGenerator createPrivateGenerator(OutputStream out) {
try {
return jsonFactory.createGenerator(out);
Expand Down Expand Up @@ -221,7 +236,7 @@ public void write(FieldBase field, StringFieldValue value) {

@Override
public void write(FieldBase field, TensorFieldValue value) {
serializeTensorField(generator, field, value, tensorShortForm, tensorDirectValues);
serializeTensorField(generator, field, value, tensorOptions);
}

@Override
Expand Down Expand Up @@ -287,6 +302,7 @@ public void write(DocumentUpdate documentUpdate) {

/**
* Utility method to easily serialize a single document.
* TODO: remove
*
* @param document the document to be serialized
* @param tensorShortForm whether tensors should be serialized in a type-dependent short form
Expand All @@ -301,6 +317,20 @@ public static byte[] toByteArray(Document document, boolean tensorShortForm, boo
return out.toByteArray();
}

/**
* Utility method to easily serialize a single document.
*
* @param document the document to be serialized
* @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not)
* @return the input document serialised as UTF-8 encoded JSON
*/
public static byte[] toByteArray(Document document, JsonFormat.EncodeOptions tensorOptions) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
JsonWriter writer = new JsonWriter(out, tensorOptions);
writer.write(document);
return out.toByteArray();
}

/**
* Utility method to easily serialize a single document.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.restapi.Path;
import com.yahoo.search.query.ParameterParser;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.text.Text;
import com.yahoo.vespa.config.content.AllClustersBucketSpacesConfig;
import com.yahoo.vespa.http.server.Headers;
Expand Down Expand Up @@ -771,22 +772,42 @@ private void writeTrace(TraceNode node) throws IOException {
}
}

private JsonFormat.EncodeOptions tensorOptions() {
// TODO: Flip default on Vespa 9 to "short-value"
String format = "short";
if (request != null && request.parameters().containsKey("format.tensors")) {
var params = request.parameters().get("format.tensors");
if (params.size() == 1) {
format = params.get(0);
}
}
return switch (format) {
case "hex" ->
new JsonFormat.EncodeOptions(true, false, true);
case "hex-value" ->
new JsonFormat.EncodeOptions(true, true, true);
default ->
// aka "short"
new JsonFormat.EncodeOptions(true, false, false);
case "short-value" ->
new JsonFormat.EncodeOptions(true, true, false);
case "long" ->
new JsonFormat.EncodeOptions(false, false, false);
case "long-value" ->
new JsonFormat.EncodeOptions(false, true, false);
};
}

private boolean tensorShortForm() {
return request == null ||
!request.parameters().containsKey("format.tensors") ||
(!request.parameters().get("format.tensors").contains("long")
&& !request.parameters().get("format.tensors").contains("long-value"));// default
return tensorOptions().shortForm();
}

private boolean tensorDirectValues() {
return request != null &&
request.parameters().containsKey("format.tensors") &&
(request.parameters().get("format.tensors").contains("short-value")
|| request.parameters().get("format.tensors").contains("long-value"));// TODO: Flip default on Vespa 9
return tensorOptions().directValues();
}

synchronized void writeSingleDocument(Document document) throws IOException {
new JsonWriter(json, tensorShortForm(), tensorDirectValues()).writeFields(document);
new JsonWriter(json, tensorOptions()).writeFields(document);
}

synchronized void writeDocumentsArrayStart() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ public class JsonFormat {
/** Options for encode */
public record EncodeOptions(boolean shortForm, boolean directValues, boolean hexForDensePart) {
// TODO - consider "compact" flag
public EncodeOptions() { this(false); }
public EncodeOptions(boolean shortForm) { this(shortForm, false); }
public EncodeOptions(boolean shortForm, boolean directValues) {
this(shortForm, directValues, false);
}
}
/**
* Serializes the given tensor value into JSON format.
Expand Down