Skip to content

Commit

Permalink
Merge pull request #270 from Netflix/no-sync-buffer-classes
Browse files Browse the repository at this point in the history
Do not use synchronous Buffered classes.
  • Loading branch information
wmiaw authored May 9, 2018
2 parents 0e190bc + f63fc6d commit 480e2d5
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 68 deletions.
44 changes: 24 additions & 20 deletions core/src/main/java/com/netflix/msl/io/JsonMslTokenizer.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* Copyright (c) 2015-2017 Netflix, Inc. All rights reserved.
*
* Copyright (c) 2015-2018 Netflix, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -15,7 +15,6 @@
*/
package com.netflix.msl.io;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
Expand All @@ -30,41 +29,46 @@
/**
* <p>Create a new {@link MslTokenizer} that parses JSON-encoded MSL
* messages.</p>
*
*
* <p>This implementation is backed by {@code org.json}.</p>
*
*
* @author Wesley Miaw <[email protected]>
*/
public class JsonMslTokenizer extends MslTokenizer {
/**
* <p>Create a new JSON MSL tokenzier that will read data off the provided
* input stream.</p>
*
*
* @param encoder MSL encoder factory.
* @param source JSON input stream.
*/
public JsonMslTokenizer(final MslEncoderFactory encoder, final InputStream source) {
this.encoder = encoder;
// We cannot use the standard InputStreamReader to support UTF-8
// decoding because it makes use of StreamDecoder which will read
// {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by default, and
// enforces a minimum of {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}.
// This will consume extra bytes and prevent the input stream from
// being used for future MSL messages.
// We cannot use the standard {@code InputStreamReader} to support
// UTF-8 decoding because it makes use of {@code StreamDecoder} which
// will read {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by
// default, and enforces a minimum of
// {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}. This will consume extra
// bytes and prevent the input stream from being used for future MSL
// messages.
//
// JSONTokenizer will consume one character at a time, but will default
// to a {@code BufferedReader} with the default buffer size, which will
// also consume extra bytes and prevent reuse of the input stream.
// {@code JSONTokener} will consume one character at a time, but will
// default to a {@code BufferedReader} with the default buffer size if
// an {@code InputStream} or {@code Reader} is provided that does not
// support mark, which will also consume extra bytes and prevent reuse
// of the input stream.
//
// Ensure only the minimum number of bytes are consumed by explicitly
// using the {@code ThriftyUtf8Reader} and a {@code BufferedReader}
// with a buffer size of 1.
// Ensure none of that occurs and that only the minimum number of bytes
// are consumed by explicitly using the {@code ThriftyUtf8Reader} which
// reads characters one at a time and also supports mark.
//
// Make sure we're trying to parse UTF-8 data.
if (StandardCharsets.UTF_8 != MslConstants.DEFAULT_CHARSET)
throw new MslInternalException("Charset " + MslConstants.DEFAULT_CHARSET + " unsupported.");
final Reader reader = new BufferedReader(new ThriftyUtf8Reader(source), 1);
final Reader reader = new ThriftyUtf8Reader(source);
this.tokenizer = new JSONTokener(reader);
}

/* (non-Javadoc)
* @see com.netflix.msl.io.MslTokenizer#next(int)
*/
Expand Down
61 changes: 30 additions & 31 deletions core/src/main/java/com/netflix/msl/io/MslEncoderFactory.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* Copyright (c) 2015-2017 Netflix, Inc. All rights reserved.
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -15,7 +15,6 @@
*/
package com.netflix.msl.io;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
Expand All @@ -28,25 +27,25 @@
* <p>An abstract factory class for producing {@link MslTokener},
* {@link MslObject}, and {@link MslArray} instances of various encoder
* formats.</p>
*
*
* <p>A concrete implementations must identify its supported and preferred
* encoder formats and provide implementations for encoding and decoding those
* formats.</p>
*
*
* @author Wesley Miaw <[email protected]>
*/
public abstract class MslEncoderFactory {
/**
* <p>Escape and quote a string for print purposes.</p>
*
*
* <p>This is based on the org.json {@code MslObject.quote()} code.</p>
*
*
* @param string the string to quote. May be {@code null}.
* @return the quoted string.
*/
static String quote(final String string) {
final StringBuilder sb = new StringBuilder();

// Return "" for null or zero-length string.
if (string == null || string.length() == 0) {
sb.append("\"\"");
Expand Down Expand Up @@ -103,12 +102,12 @@ static String quote(final String string) {
sb.append('"');
return sb.toString();
}

/**
* <p>Convert a value to a string for print purposes.</p>
*
*
* <p>This is based on the org.json {@code MslObject.writeValue()} code.</p>
*
*
* @param value the value to convert to a string. May be {@code null}.
* @return the string.
*/
Expand All @@ -132,23 +131,23 @@ static String stringify(final Object value) {
return quote(value.toString());
}
}

/**
* Returns the most preferred encoder format from the provided set of
* formats.
*
*
* @param formats the set of formats to choose from. May be {@code null} or
* empty.
* @return the preferred format from the provided set or the default format
* if format set is {@code null} or empty.
*/
public abstract MslEncoderFormat getPreferredFormat(final Set<MslEncoderFormat> formats);

/**
* Create a new {@link MslTokenizer}. The encoder format will be
* determined by inspecting the byte stream identifier located in the first
* byte.
*
*
* @param source the binary data to tokenize.
* @return the {@link MslTokenizer}.
* @throws IOException if there is a problem reading the byte stream
Expand All @@ -158,41 +157,41 @@ static String stringify(final Object value) {
*/
public MslTokenizer createTokenizer(final InputStream source) throws IOException, MslEncoderException {
// Read the byte stream identifier (and only the identifier).
final InputStream bufferedSource = source.markSupported() ? source : new BufferedInputStream(source, 1);
final InputStream bufferedSource = source.markSupported() ? source : new UnsynchronizedBufferedInputStream(source);
bufferedSource.mark(1);
final byte id = (byte)bufferedSource.read();
if (id == -1)
throw new MslEncoderException("End of stream reached when attempting to read the byte stream identifier.");

// Identify the encoder format.
final MslEncoderFormat format = MslEncoderFormat.getFormat(id);
if (format == null)
throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + ".");

// Reset the input stream and return the tokenizer.
bufferedSource.reset();
return generateTokenizer(bufferedSource, format);
}

/**
* Create a new {@link MslTokenizer} of the specified encoder format.
*
*
* @param source the binary data to tokenize.
* @param format the encoder format.
* @return the {@link MslTokenizer}.
* @throws MslEncoderException if the encoder format is not supported.
*/
protected abstract MslTokenizer generateTokenizer(final InputStream source, final MslEncoderFormat format) throws MslEncoderException;

/**
* Create a new {@link MslObject}.
*
*
* @return the {@link MslObject}.
*/
public MslObject createObject() {
return createObject(null);
}

/**
* Create a new {@link MslObject} populated with the provided map.
*
Expand All @@ -205,12 +204,12 @@ public MslObject createObject() {
public MslObject createObject(final Map<String,Object> map) {
return new MslObject(map);
}

/**
* Identify the encoder format of the {@link MslObject} of the encoded
* data. The format will be identified by inspecting the byte stream
* identifier located in the first byte.
*
*
* @param encoding the encoded data.
* @return the encoder format.
* @throws MslEncoderException if the encoder format cannot be identified
Expand All @@ -220,30 +219,30 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce
// Fail if the encoding is too short.
if (encoding.length < 1)
throw new MslEncoderException("No encoding identifier found.");

// Identify the encoder format.
final byte id = encoding[0];
final MslEncoderFormat format = MslEncoderFormat.getFormat(id);
if (format == null)
throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + ".");
return format;
}

/**
* Parse a {@link MslObject} from encoded data. The encoder format will be
* determined by inspecting the byte stream identifier located in the first
* byte.
*
*
* @param encoding the encoded data to parse.
* @return the {@link MslObject}.
* @throws MslEncoderException if the encoder format is not supported or
* there is an error parsing the encoded data.
*/
public abstract MslObject parseObject(final byte[] encoding) throws MslEncoderException;

/**
* Encode a {@link MslObject} into the specified encoder format.
*
*
* @param object the {@link MslObject} to encode.
* @param format the encoder format.
* @return the encoded data.
Expand All @@ -254,7 +253,7 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce

/**
* Create a new {@link MslArray}.
*
*
* @return the {@link MslArray}.
*/
public MslArray createArray() {
Expand All @@ -263,7 +262,7 @@ public MslArray createArray() {

/**
* Create a new {@link MslArray} populated with the provided values.
*
*
* @param collection the collection of values. May be {@code null}.
* @return the {@link MslArray}.
* @throws IllegalArgumentException if one of the values is of an
Expand Down
57 changes: 49 additions & 8 deletions core/src/main/java/com/netflix/msl/io/ThriftyUtf8Reader.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* Copyright (c) 2017 Netflix, Inc. All rights reserved.
*
* Copyright (c) 2017-2018 Netflix, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -24,16 +24,16 @@
* necessary to decode the character, and does not close the underlying input
* stream. This ensures any unneeded bytes remain on the input stream, which
* can then be reused.</p>
*
*
* <p>Based on Andy Clark's
* {@code com.sun.org.apache.xerces.internal.impl.io.UTF8Reader}.</p>
*
*
* @author Wesley Miaw <[email protected]>
*/
public class ThriftyUtf8Reader extends Reader {
/** Default byte buffer size (8192). */
public static final int DEFAULT_BUFFER_SIZE = 8192;

/** Input stream. */
private final InputStream fInputStream;
/** Byte buffer. */
Expand All @@ -42,11 +42,11 @@ public class ThriftyUtf8Reader extends Reader {
private int fOffset = 0;
/** Surrogate character. */
private int fSurrogate = -1;

public ThriftyUtf8Reader(final InputStream inputStream) {
fInputStream = inputStream;
}

@Override
public int read() throws IOException {
// decode character
Expand Down Expand Up @@ -459,13 +459,54 @@ public int read(final char ch[], final int offset, int length) throws IOExceptio
}

@Override
public void reset() {
public long skip(final long n) throws IOException {
// Don't pass skip down to the backing input stream since we're being
// asked to skip characters and not bytes.
long remaining = n;
final char[] ch = new char[fBuffer.length];
do {
final int length = ch.length < remaining ? ch.length : (int)remaining;
final int count = read(ch, 0, length);
if (count > 0)
remaining -= count;
else
break;
} while (remaining > 0);

final long skipped = n - remaining;
return skipped;
}

/**
* Tell whether this stream supports the mark() operation.
*/
@Override
public boolean markSupported() {
return fInputStream.markSupported();
}

@Override
public void mark(final int readLimit) throws IOException {
// This is complicated because the read limit is in characters but the
// backing input stream is in bytes. If we really want to be safe then
// we need to multiply by 4 bytes. Account for overflow.
final int byteLimit = 4 * readLimit;
final int safeLimit = (byteLimit < 0) ? Integer.MAX_VALUE : byteLimit;
fInputStream.mark(safeLimit);
}

@Override
public void reset() throws IOException {
fOffset = 0;
fSurrogate = -1;
fInputStream.reset();
}

@Override
public void close() {
// Explicitly do not close the backing input stream for our use case.
// This is because we are using ThriftyUtf8Reader inside a stream
// parser.
}

/** Throws an exception for expected byte. */
Expand Down
Loading

0 comments on commit 480e2d5

Please sign in to comment.