-
Notifications
You must be signed in to change notification settings - Fork 79
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #270 from Netflix/no-sync-buffer-classes
Do not use synchronous Buffered classes.
- Loading branch information
Showing
8 changed files
with
311 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/** | ||
* Copyright (c) 2015-2017 Netflix, Inc. All rights reserved. | ||
* | ||
* Copyright (c) 2015-2018 Netflix, Inc. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
|
@@ -15,7 +15,6 @@ | |
*/ | ||
package com.netflix.msl.io; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.InputStream; | ||
import java.io.Reader; | ||
import java.nio.charset.StandardCharsets; | ||
|
@@ -30,41 +29,46 @@ | |
/** | ||
* <p>Create a new {@link MslTokenizer} that parses JSON-encoded MSL | ||
* messages.</p> | ||
* | ||
* | ||
* <p>This implementation is backed by {@code org.json}.</p> | ||
* | ||
* | ||
* @author Wesley Miaw <[email protected]> | ||
*/ | ||
public class JsonMslTokenizer extends MslTokenizer { | ||
/** | ||
* <p>Create a new JSON MSL tokenzier that will read data off the provided | ||
* input stream.</p> | ||
* | ||
* | ||
* @param encoder MSL encoder factory. | ||
* @param source JSON input stream. | ||
*/ | ||
public JsonMslTokenizer(final MslEncoderFactory encoder, final InputStream source) { | ||
this.encoder = encoder; | ||
// We cannot use the standard InputStreamReader to support UTF-8 | ||
// decoding because it makes use of StreamDecoder which will read | ||
// {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by default, and | ||
// enforces a minimum of {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}. | ||
// This will consume extra bytes and prevent the input stream from | ||
// being used for future MSL messages. | ||
// We cannot use the standard {@code InputStreamReader} to support | ||
// UTF-8 decoding because it makes use of {@code StreamDecoder} which | ||
// will read {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by | ||
// default, and enforces a minimum of | ||
// {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}. This will consume extra | ||
// bytes and prevent the input stream from being used for future MSL | ||
// messages. | ||
// | ||
// JSONTokenizer will consume one character at a time, but will default | ||
// to a {@code BufferedReader} with the default buffer size, which will | ||
// also consume extra bytes and prevent reuse of the input stream. | ||
// {@code JSONTokener} will consume one character at a time, but will | ||
// default to a {@code BufferedReader} with the default buffer size if | ||
// an {@code InputStream} or {@code Reader} is provided that does not | ||
// support mark, which will also consume extra bytes and prevent reuse | ||
// of the input stream. | ||
// | ||
// Ensure only the minimum number of bytes are consumed by explicitly | ||
// using the {@code ThriftyUtf8Reader} and a {@code BufferedReader} | ||
// with a buffer size of 1. | ||
// Ensure none of that occurs and that only the minimum number of bytes | ||
// are consumed by explicitly using the {@code ThriftyUtf8Reader} which | ||
// reads characters one at a time and also supports mark. | ||
// | ||
// Make sure we're trying to parse UTF-8 data. | ||
if (StandardCharsets.UTF_8 != MslConstants.DEFAULT_CHARSET) | ||
throw new MslInternalException("Charset " + MslConstants.DEFAULT_CHARSET + " unsupported."); | ||
final Reader reader = new BufferedReader(new ThriftyUtf8Reader(source), 1); | ||
final Reader reader = new ThriftyUtf8Reader(source); | ||
this.tokenizer = new JSONTokener(reader); | ||
} | ||
|
||
/* (non-Javadoc) | ||
* @see com.netflix.msl.io.MslTokenizer#next(int) | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/** | ||
* Copyright (c) 2015-2017 Netflix, Inc. All rights reserved. | ||
* | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
|
@@ -15,7 +15,6 @@ | |
*/ | ||
package com.netflix.msl.io; | ||
|
||
import java.io.BufferedInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Collection; | ||
|
@@ -28,25 +27,25 @@ | |
* <p>An abstract factory class for producing {@link MslTokener}, | ||
* {@link MslObject}, and {@link MslArray} instances of various encoder | ||
* formats.</p> | ||
* | ||
* | ||
* <p>A concrete implementations must identify its supported and preferred | ||
* encoder formats and provide implementations for encoding and decoding those | ||
* formats.</p> | ||
* | ||
* | ||
* @author Wesley Miaw <[email protected]> | ||
*/ | ||
public abstract class MslEncoderFactory { | ||
/** | ||
* <p>Escape and quote a string for print purposes.</p> | ||
* | ||
* | ||
* <p>This is based on the org.json {@code MslObject.quote()} code.</p> | ||
* | ||
* | ||
* @param string the string to quote. May be {@code null}. | ||
* @return the quoted string. | ||
*/ | ||
static String quote(final String string) { | ||
final StringBuilder sb = new StringBuilder(); | ||
|
||
// Return "" for null or zero-length string. | ||
if (string == null || string.length() == 0) { | ||
sb.append("\"\""); | ||
|
@@ -103,12 +102,12 @@ static String quote(final String string) { | |
sb.append('"'); | ||
return sb.toString(); | ||
} | ||
|
||
/** | ||
* <p>Convert a value to a string for print purposes.</p> | ||
* | ||
* | ||
* <p>This is based on the org.json {@code MslObject.writeValue()} code.</p> | ||
* | ||
* | ||
* @param value the value to convert to a string. May be {@code null}. | ||
* @return the string. | ||
*/ | ||
|
@@ -132,23 +131,23 @@ static String stringify(final Object value) { | |
return quote(value.toString()); | ||
} | ||
} | ||
|
||
/** | ||
* Returns the most preferred encoder format from the provided set of | ||
* formats. | ||
* | ||
* | ||
* @param formats the set of formats to choose from. May be {@code null} or | ||
* empty. | ||
* @return the preferred format from the provided set or the default format | ||
* if format set is {@code null} or empty. | ||
*/ | ||
public abstract MslEncoderFormat getPreferredFormat(final Set<MslEncoderFormat> formats); | ||
|
||
/** | ||
* Create a new {@link MslTokenizer}. The encoder format will be | ||
* determined by inspecting the byte stream identifier located in the first | ||
* byte. | ||
* | ||
* | ||
* @param source the binary data to tokenize. | ||
* @return the {@link MslTokenizer}. | ||
* @throws IOException if there is a problem reading the byte stream | ||
|
@@ -158,41 +157,41 @@ static String stringify(final Object value) { | |
*/ | ||
public MslTokenizer createTokenizer(final InputStream source) throws IOException, MslEncoderException { | ||
// Read the byte stream identifier (and only the identifier). | ||
final InputStream bufferedSource = source.markSupported() ? source : new BufferedInputStream(source, 1); | ||
final InputStream bufferedSource = source.markSupported() ? source : new UnsynchronizedBufferedInputStream(source); | ||
bufferedSource.mark(1); | ||
final byte id = (byte)bufferedSource.read(); | ||
if (id == -1) | ||
throw new MslEncoderException("End of stream reached when attempting to read the byte stream identifier."); | ||
|
||
// Identify the encoder format. | ||
final MslEncoderFormat format = MslEncoderFormat.getFormat(id); | ||
if (format == null) | ||
throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + "."); | ||
|
||
// Reset the input stream and return the tokenizer. | ||
bufferedSource.reset(); | ||
return generateTokenizer(bufferedSource, format); | ||
} | ||
|
||
/** | ||
* Create a new {@link MslTokenizer} of the specified encoder format. | ||
* | ||
* | ||
* @param source the binary data to tokenize. | ||
* @param format the encoder format. | ||
* @return the {@link MslTokenizer}. | ||
* @throws MslEncoderException if the encoder format is not supported. | ||
*/ | ||
protected abstract MslTokenizer generateTokenizer(final InputStream source, final MslEncoderFormat format) throws MslEncoderException; | ||
|
||
/** | ||
* Create a new {@link MslObject}. | ||
* | ||
* | ||
* @return the {@link MslObject}. | ||
*/ | ||
public MslObject createObject() { | ||
return createObject(null); | ||
} | ||
|
||
/** | ||
* Create a new {@link MslObject} populated with the provided map. | ||
* | ||
|
@@ -205,12 +204,12 @@ public MslObject createObject() { | |
public MslObject createObject(final Map<String,Object> map) { | ||
return new MslObject(map); | ||
} | ||
|
||
/** | ||
* Identify the encoder format of the {@link MslObject} of the encoded | ||
* data. The format will be identified by inspecting the byte stream | ||
* identifier located in the first byte. | ||
* | ||
* | ||
* @param encoding the encoded data. | ||
* @return the encoder format. | ||
* @throws MslEncoderException if the encoder format cannot be identified | ||
|
@@ -220,30 +219,30 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce | |
// Fail if the encoding is too short. | ||
if (encoding.length < 1) | ||
throw new MslEncoderException("No encoding identifier found."); | ||
|
||
// Identify the encoder format. | ||
final byte id = encoding[0]; | ||
final MslEncoderFormat format = MslEncoderFormat.getFormat(id); | ||
if (format == null) | ||
throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + "."); | ||
return format; | ||
} | ||
|
||
/** | ||
* Parse a {@link MslObject} from encoded data. The encoder format will be | ||
* determined by inspecting the byte stream identifier located in the first | ||
* byte. | ||
* | ||
* | ||
* @param encoding the encoded data to parse. | ||
* @return the {@link MslObject}. | ||
* @throws MslEncoderException if the encoder format is not supported or | ||
* there is an error parsing the encoded data. | ||
*/ | ||
public abstract MslObject parseObject(final byte[] encoding) throws MslEncoderException; | ||
|
||
/** | ||
* Encode a {@link MslObject} into the specified encoder format. | ||
* | ||
* | ||
* @param object the {@link MslObject} to encode. | ||
* @param format the encoder format. | ||
* @return the encoded data. | ||
|
@@ -254,7 +253,7 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce | |
|
||
/** | ||
* Create a new {@link MslArray}. | ||
* | ||
* | ||
* @return the {@link MslArray}. | ||
*/ | ||
public MslArray createArray() { | ||
|
@@ -263,7 +262,7 @@ public MslArray createArray() { | |
|
||
/** | ||
* Create a new {@link MslArray} populated with the provided values. | ||
* | ||
* | ||
* @param collection the collection of values. May be {@code null}. | ||
* @return the {@link MslArray}. | ||
* @throws IllegalArgumentException if one of the values is of an | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/** | ||
* Copyright (c) 2017 Netflix, Inc. All rights reserved. | ||
* | ||
* Copyright (c) 2017-2018 Netflix, Inc. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
|
@@ -24,16 +24,16 @@ | |
* necessary to decode the character, and does not close the underlying input | ||
* stream. This ensures any unneeded bytes remain on the input stream, which | ||
* can then be reused.</p> | ||
* | ||
* | ||
* <p>Based on Andy Clark's | ||
* {@code com.sun.org.apache.xerces.internal.impl.io.UTF8Reader}.</p> | ||
* | ||
* | ||
* @author Wesley Miaw <[email protected]> | ||
*/ | ||
public class ThriftyUtf8Reader extends Reader { | ||
/** Default byte buffer size (8192). */ | ||
public static final int DEFAULT_BUFFER_SIZE = 8192; | ||
|
||
/** Input stream. */ | ||
private final InputStream fInputStream; | ||
/** Byte buffer. */ | ||
|
@@ -42,11 +42,11 @@ public class ThriftyUtf8Reader extends Reader { | |
private int fOffset = 0; | ||
/** Surrogate character. */ | ||
private int fSurrogate = -1; | ||
|
||
public ThriftyUtf8Reader(final InputStream inputStream) { | ||
fInputStream = inputStream; | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
// decode character | ||
|
@@ -459,13 +459,54 @@ public int read(final char ch[], final int offset, int length) throws IOExceptio | |
} | ||
|
||
@Override | ||
public void reset() { | ||
public long skip(final long n) throws IOException { | ||
// Don't pass skip down to the backing input stream since we're being | ||
// asked to skip characters and not bytes. | ||
long remaining = n; | ||
final char[] ch = new char[fBuffer.length]; | ||
do { | ||
final int length = ch.length < remaining ? ch.length : (int)remaining; | ||
final int count = read(ch, 0, length); | ||
if (count > 0) | ||
remaining -= count; | ||
else | ||
break; | ||
} while (remaining > 0); | ||
|
||
final long skipped = n - remaining; | ||
return skipped; | ||
} | ||
|
||
/** | ||
* Tell whether this stream supports the mark() operation. | ||
*/ | ||
@Override | ||
public boolean markSupported() { | ||
return fInputStream.markSupported(); | ||
} | ||
|
||
@Override | ||
public void mark(final int readLimit) throws IOException { | ||
// This is complicated because the read limit is in characters but the | ||
// backing input stream is in bytes. If we really want to be safe then | ||
// we need to multiply by 4 bytes. Account for overflow. | ||
final int byteLimit = 4 * readLimit; | ||
final int safeLimit = (byteLimit < 0) ? Integer.MAX_VALUE : byteLimit; | ||
fInputStream.mark(safeLimit); | ||
} | ||
|
||
@Override | ||
public void reset() throws IOException { | ||
fOffset = 0; | ||
fSurrogate = -1; | ||
fInputStream.reset(); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
// Explicitly do not close the backing input stream for our use case. | ||
// This is because we are using ThriftyUtf8Reader inside a stream | ||
// parser. | ||
} | ||
|
||
/** Throws an exception for expected byte. */ | ||
|
Oops, something went wrong.