Merge pull request #270 from Netflix/no-sync-buffer-classes

Do not use synchronous Buffered classes.
Netflix · May 9, 2018 · 480e2d5 · 480e2d5
2 parents 0e190bc + f63fc6d
commit 480e2d5
Show file tree

Hide file tree

Showing 8 changed files with 311 additions and 68 deletions.
diff --git a/core/src/main/java/com/netflix/msl/io/JsonMslTokenizer.java b/core/src/main/java/com/netflix/msl/io/JsonMslTokenizer.java
@@ -1,6 +1,6 @@
 /**
- * Copyright (c) 2015-2017 Netflix, Inc.  All rights reserved.
- * 
+ * Copyright (c) 2015-2018 Netflix, Inc.  All rights reserved.
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
@@ -15,7 +15,6 @@
  */
 package com.netflix.msl.io;
 
-import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.Reader;
 import java.nio.charset.StandardCharsets;
@@ -30,41 +29,46 @@
 /**
  * <p>Create a new {@link MslTokenizer} that parses JSON-encoded MSL
  * messages.</p>
- * 
+ *
  * <p>This implementation is backed by {@code org.json}.</p>
- * 
+ *
  * @author Wesley Miaw <[email protected]>
  */
 public class JsonMslTokenizer extends MslTokenizer {
     /**
      * <p>Create a new JSON MSL tokenzier that will read data off the provided
      * input stream.</p>
-     * 
+     *
      * @param encoder MSL encoder factory.
      * @param source JSON input stream.
      */
     public JsonMslTokenizer(final MslEncoderFactory encoder, final InputStream source) {
         this.encoder = encoder;
-        // We cannot use the standard InputStreamReader to support UTF-8 
-        // decoding because it makes use of StreamDecoder which will read
-        // {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by default, and
-        // enforces a minimum of {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}.
-        // This will consume extra bytes and prevent the input stream from
-        // being used for future MSL messages.
+        // We cannot use the standard {@code InputStreamReader} to support
+        // UTF-8 decoding because it makes use of {@code StreamDecoder} which
+        // will read {@code StreamDecoder.DEFAULT_BYTE_BUFFER_SIZE} bytes by
+        // default, and enforces a minimum of
+        // {@code StreamDecoder.MIN_BYTE_BUFFER_SIZE}. This will consume extra
+        // bytes and prevent the input stream from being used for future MSL
+        // messages.
         //
-        // JSONTokenizer will consume one character at a time, but will default
-        // to a {@code BufferedReader} with the default buffer size, which will
-        // also consume extra bytes and prevent reuse of the input stream.
+        // {@code JSONTokener} will consume one character at a time, but will
+        // default to a {@code BufferedReader} with the default buffer size if
+        // an {@code InputStream} or {@code Reader} is provided that does not
+        // support mark, which will also consume extra bytes and prevent reuse
+        // of the input stream.
         //
-        // Ensure only the minimum number of bytes are consumed by explicitly
-        // using the {@code ThriftyUtf8Reader} and a {@code BufferedReader}
-        // with a buffer size of 1.
+        // Ensure none of that occurs and that only the minimum number of bytes
+        // are consumed by explicitly using the {@code ThriftyUtf8Reader} which
+        // reads characters one at a time and also supports mark.
+        //
+        // Make sure we're trying to parse UTF-8 data.
         if (StandardCharsets.UTF_8 != MslConstants.DEFAULT_CHARSET)
             throw new MslInternalException("Charset " + MslConstants.DEFAULT_CHARSET + " unsupported.");
-        final Reader reader = new BufferedReader(new ThriftyUtf8Reader(source), 1);
+        final Reader reader = new ThriftyUtf8Reader(source);
         this.tokenizer = new JSONTokener(reader);
     }
-    
+
     /* (non-Javadoc)
      * @see com.netflix.msl.io.MslTokenizer#next(int)
      */

diff --git a/core/src/main/java/com/netflix/msl/io/MslEncoderFactory.java b/core/src/main/java/com/netflix/msl/io/MslEncoderFactory.java
@@ -1,6 +1,6 @@
 /**
  * Copyright (c) 2015-2017 Netflix, Inc.  All rights reserved.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
@@ -15,7 +15,6 @@
  */
 package com.netflix.msl.io;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collection;
@@ -28,25 +27,25 @@
  * <p>An abstract factory class for producing {@link MslTokener},
  * {@link MslObject}, and {@link MslArray} instances of various encoder
  * formats.</p>
- * 
+ *
  * <p>A concrete implementations must identify its supported and preferred
  * encoder formats and provide implementations for encoding and decoding those
  * formats.</p>
- * 
+ *
  * @author Wesley Miaw <[email protected]>
  */
 public abstract class MslEncoderFactory {
     /**
      * <p>Escape and quote a string for print purposes.</p>
-     * 
+     *
      * <p>This is based on the org.json {@code MslObject.quote()} code.</p>
-     * 
+     *
      * @param string the string to quote. May be {@code null}.
      * @return the quoted string.
      */
     static String quote(final String string) {
         final StringBuilder sb = new StringBuilder();
-        
+
         // Return "" for null or zero-length string.
         if (string == null || string.length() == 0) {
             sb.append("\"\"");
@@ -103,12 +102,12 @@ static String quote(final String string) {
         sb.append('"');
         return sb.toString();
     }
-    
+
     /**
      * <p>Convert a value to a string for print purposes.</p>
-     * 
+     *
      * <p>This is based on the org.json {@code MslObject.writeValue()} code.</p>
-     * 
+     *
      * @param value the value to convert to a string. May be {@code null}.
      * @return the string.
      */
@@ -132,23 +131,23 @@ static String stringify(final Object value) {
             return quote(value.toString());
         }
     }
-    
+
     /**
      * Returns the most preferred encoder format from the provided set of
      * formats.
-     * 
+     *
      * @param formats the set of formats to choose from. May be {@code null} or
      *        empty.
      * @return the preferred format from the provided set or the default format
      *         if format set is {@code null} or empty.
      */
     public abstract MslEncoderFormat getPreferredFormat(final Set<MslEncoderFormat> formats);
-    
+
     /**
      * Create a new {@link MslTokenizer}. The encoder format will be
      * determined by inspecting the byte stream identifier located in the first
      * byte.
-     * 
+     *
      * @param source the binary data to tokenize.
      * @return the {@link MslTokenizer}.
      * @throws IOException if there is a problem reading the byte stream
@@ -158,41 +157,41 @@ static String stringify(final Object value) {
      */
     public MslTokenizer createTokenizer(final InputStream source) throws IOException, MslEncoderException {
         // Read the byte stream identifier (and only the identifier).
-        final InputStream bufferedSource = source.markSupported() ? source : new BufferedInputStream(source, 1);
+        final InputStream bufferedSource = source.markSupported() ? source : new UnsynchronizedBufferedInputStream(source);
         bufferedSource.mark(1);
         final byte id = (byte)bufferedSource.read();
         if (id == -1)
             throw new MslEncoderException("End of stream reached when attempting to read the byte stream identifier.");
-        
+
         // Identify the encoder format.
         final MslEncoderFormat format = MslEncoderFormat.getFormat(id);
         if (format == null)
             throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + ".");
-        
+
         // Reset the input stream and return the tokenizer.
         bufferedSource.reset();
         return generateTokenizer(bufferedSource, format);
     }
-    
+
     /**
      * Create a new {@link MslTokenizer} of the specified encoder format.
-     * 
+     *
      * @param source the binary data to tokenize.
      * @param format the encoder format.
      * @return the {@link MslTokenizer}.
      * @throws MslEncoderException if the encoder format is not supported.
      */
     protected abstract MslTokenizer generateTokenizer(final InputStream source, final MslEncoderFormat format) throws MslEncoderException;
-    
+
     /**
      * Create a new {@link MslObject}.
-     * 
+     *
      * @return the {@link MslObject}.
      */
     public MslObject createObject() {
         return createObject(null);
     }
-    
+
     /**
      * Create a new {@link MslObject} populated with the provided map.
      *
@@ -205,12 +204,12 @@ public MslObject createObject() {
     public MslObject createObject(final Map<String,Object> map) {
         return new MslObject(map);
     }
-    
+
     /**
      * Identify the encoder format of the {@link MslObject} of the encoded
      * data. The format will be identified by inspecting the byte stream
      * identifier located in the first byte.
-     * 
+     *
      * @param encoding the encoded data.
      * @return the encoder format.
      * @throws MslEncoderException if the encoder format cannot be identified
@@ -220,30 +219,30 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce
         // Fail if the encoding is too short.
         if (encoding.length < 1)
             throw new MslEncoderException("No encoding identifier found.");
-        
+
         // Identify the encoder format.
         final byte id = encoding[0];
         final MslEncoderFormat format = MslEncoderFormat.getFormat(id);
         if (format == null)
             throw new MslEncoderException("Unidentified encoder format ID: (byte)" + id + ".");
         return format;
     }
-    
+
     /**
      * Parse a {@link MslObject} from encoded data. The encoder format will be
      * determined by inspecting the byte stream identifier located in the first
      * byte.
-     * 
+     *
      * @param encoding the encoded data to parse.
      * @return the {@link MslObject}.
      * @throws MslEncoderException if the encoder format is not supported or
      *         there is an error parsing the encoded data.
      */
     public abstract MslObject parseObject(final byte[] encoding) throws MslEncoderException;
-    
+
     /**
      * Encode a {@link MslObject} into the specified encoder format.
-     * 
+     *
      * @param object the {@link MslObject} to encode.
      * @param format the encoder format.
      * @return the encoded data.
@@ -254,7 +253,7 @@ public MslEncoderFormat parseFormat(final byte[] encoding) throws MslEncoderExce
 
     /**
      * Create a new {@link MslArray}.
-     * 
+     *
      * @return the {@link MslArray}.
      */
     public MslArray createArray() {
@@ -263,7 +262,7 @@ public MslArray createArray() {
 
     /**
      * Create a new {@link MslArray} populated with the provided values.
-     * 
+     *
      * @param collection the collection of values. May be {@code null}.
      * @return the {@link MslArray}.
      * @throws IllegalArgumentException if one of the values is of an

diff --git a/core/src/main/java/com/netflix/msl/io/ThriftyUtf8Reader.java b/core/src/main/java/com/netflix/msl/io/ThriftyUtf8Reader.java
@@ -1,6 +1,6 @@
 /**
- * Copyright (c) 2017 Netflix, Inc.  All rights reserved.
- * 
+ * Copyright (c) 2017-2018 Netflix, Inc.  All rights reserved.
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
@@ -24,16 +24,16 @@
  * necessary to decode the character, and does not close the underlying input
  * stream. This ensures any unneeded bytes remain on the input stream, which
  * can then be reused.</p>
- * 
+ *
  * <p>Based on Andy Clark's
  * {@code com.sun.org.apache.xerces.internal.impl.io.UTF8Reader}.</p>
- * 
+ *
  * @author Wesley Miaw <[email protected]>
  */
 public class ThriftyUtf8Reader extends Reader {
     /** Default byte buffer size (8192). */
     public static final int DEFAULT_BUFFER_SIZE = 8192;
-    
+
     /** Input stream. */
     private final InputStream fInputStream;
     /** Byte buffer. */
@@ -42,11 +42,11 @@ public class ThriftyUtf8Reader extends Reader {
     private int fOffset = 0;
     /** Surrogate character. */
     private int fSurrogate = -1;
-    
+
     public ThriftyUtf8Reader(final InputStream inputStream) {
         fInputStream = inputStream;
     }
-    
+
     @Override
     public int read() throws IOException {
         // decode character
@@ -459,13 +459,54 @@ public int read(final char ch[], final int offset, int length) throws IOExceptio
     }
 
     @Override
-    public void reset() {
+    public long skip(final long n) throws IOException {
+        // Don't pass skip down to the backing input stream since we're being
+        // asked to skip characters and not bytes.
+        long remaining = n;
+        final char[] ch = new char[fBuffer.length];
+        do {
+            final int length = ch.length < remaining ? ch.length : (int)remaining;
+            final int count = read(ch, 0, length);
+            if (count > 0)
+                remaining -= count;
+            else
+                break;
+        } while (remaining > 0);
+
+        final long skipped = n - remaining;
+        return skipped;
+    }
+
+    /**
+     * Tell whether this stream supports the mark() operation.
+     */
+    @Override
+    public boolean markSupported() {
+        return fInputStream.markSupported();
+    }
+
+    @Override
+    public void mark(final int readLimit) throws IOException {
+        // This is complicated because the read limit is in characters but the
+        // backing input stream is in bytes. If we really want to be safe then
+        // we need to multiply by 4 bytes. Account for overflow.
+        final int byteLimit = 4 * readLimit;
+        final int safeLimit = (byteLimit < 0) ? Integer.MAX_VALUE : byteLimit;
+        fInputStream.mark(safeLimit);
+    }
+
+    @Override
+    public void reset() throws IOException {
         fOffset = 0;
         fSurrogate = -1;
+        fInputStream.reset();
     }
 
     @Override
     public void close() {
+        // Explicitly do not close the backing input stream for our use case.
+        // This is because we are using ThriftyUtf8Reader inside a stream
+        // parser.
     }
 
     /** Throws an exception for expected byte. */