From 02f35857c0787d58cb7616d36236de07ef569b77 Mon Sep 17 00:00:00 2001 From: David Walluck Date: Thu, 20 Feb 2025 16:30:30 -0500 Subject: [PATCH 1/3] Fix URL encoding and decoding The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes #150 Closes #153 Fixes #154 --- .../com/github/packageurl/PackageURL.java | 63 ++++++++++--------- .../com/github/packageurl/PackageURLTest.java | 13 ++++ 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index 4824b42..5a604b7 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -21,6 +21,7 @@ */ package com.github.packageurl; +import java.io.ByteArrayOutputStream; import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; @@ -441,22 +442,25 @@ private String percentEncode(final String input) { } private static String uriEncode(String source, Charset charset) { - if (source == null || source.length() == 0) { + if (source == null || source.isEmpty()) { return source; } - StringBuilder builder = new StringBuilder(); - for (byte b : source.getBytes(charset)) { + boolean changed = false; + StringBuilder builder = new StringBuilder(source.length()); + byte[] bytes = source.getBytes(charset); + + for (byte b : bytes) { if (isUnreserved(b)) { builder.append((char) b); - } - else { - // Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character + } else { builder.append('%'); - builder.append(Integer.toHexString(b).toUpperCase()); + builder.append(Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16))); + builder.append(Character.toUpperCase(Character.forDigit(b & 0xF, 16))); + changed = true; } } - return builder.toString(); + return changed ? builder.toString() : source; } private static boolean isUnreserved(int c) { @@ -479,34 +483,37 @@ private static boolean isDigit(int c) { * @return a decoded String */ private String percentDecode(final String input) { - if (input == null) { - return null; - } - final String decoded = uriDecode(input); - if (!decoded.equals(input)) { - return decoded; - } - return input; + return uriDecode(input); } public static String uriDecode(String source) { - if (source == null) { + if (source == null || source.isEmpty()) { return source; } - int length = source.length(); - StringBuilder builder = new StringBuilder(); + + boolean changed = false; + byte[] bytes = source.getBytes(StandardCharsets.UTF_8); + int length = bytes.length; + ByteArrayOutputStream buffer = new ByteArrayOutputStream(length); + for (int i = 0; i < length; i++) { - if (source.charAt(i) == '%') { - String str = source.substring(i + 1, i + 3); - char c = (char) Integer.parseInt(str, 16); - builder.append(c); - i += 2; - } - else { - builder.append(source.charAt(i)); + int b = bytes[i]; + + if (b == '%') { + if (i + 2 >= length) { + return null; + } + + int b1 = Character.digit(bytes[++i], 16); + int b2 = Character.digit(bytes[++i], 16); + buffer.write((char) ((b1 << 4) + b2)); + changed = true; + } else { + buffer.write(b); } } - return builder.toString(); + + return changed ? new String(buffer.toByteArray(), StandardCharsets.UTF_8) : source; } /** diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java index d7ddf18..5981bac 100644 --- a/src/test/java/com/github/packageurl/PackageURLTest.java +++ b/src/test/java/com/github/packageurl/PackageURLTest.java @@ -55,6 +55,19 @@ public static void setup() throws IOException { json = new JSONArray(jsonTxt); } + @Test + public void testEncoding1() throws MalformedPackageURLException { + PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null); + Assert.assertEquals("pkg:maven/com.google.summit/summit-ast@2.2.0%0A", purl.toString()); + } + + @Test + public void testEncoding2() throws MalformedPackageURLException { + PackageURL purl = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5"); + Assert.assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl.getName()); + Assert.assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl.toString()); + } + @Test public void testConstructorParsing() throws Exception { exception = ExpectedException.none(); From 65829b957eab50f63fe4a244b007cf1cf78d4a2a Mon Sep 17 00:00:00 2001 From: David Walluck Date: Fri, 21 Feb 2025 09:41:22 -0500 Subject: [PATCH 2/3] Run a separate pass to determine whether input needs to be changed --- .../com/github/packageurl/PackageURL.java | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index 5a604b7..e8552f0 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -34,6 +34,7 @@ import java.util.TreeMap; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.IntStream; /** *

Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:

@@ -446,10 +447,14 @@ private static String uriEncode(String source, Charset charset) { return source; } - boolean changed = false; - StringBuilder builder = new StringBuilder(source.length()); byte[] bytes = source.getBytes(charset); + if (IntStream.range(0, bytes.length).allMatch(i -> isUnreserved(bytes[i]))) { + return source; + } + + StringBuilder builder = new StringBuilder(source.length()); + for (byte b : bytes) { if (isUnreserved(b)) { builder.append((char) b); @@ -457,10 +462,10 @@ private static String uriEncode(String source, Charset charset) { builder.append('%'); builder.append(Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16))); builder.append(Character.toUpperCase(Character.forDigit(b & 0xF, 16))); - changed = true; } } - return changed ? builder.toString() : source; + + return builder.toString(); } private static boolean isUnreserved(int c) { @@ -491,7 +496,10 @@ public static String uriDecode(String source) { return source; } - boolean changed = false; + if (source.indexOf('%') == -1) { + return source; + } + byte[] bytes = source.getBytes(StandardCharsets.UTF_8); int length = bytes.length; ByteArrayOutputStream buffer = new ByteArrayOutputStream(length); @@ -507,13 +515,12 @@ public static String uriDecode(String source) { int b1 = Character.digit(bytes[++i], 16); int b2 = Character.digit(bytes[++i], 16); buffer.write((char) ((b1 << 4) + b2)); - changed = true; } else { buffer.write(b); } } - return changed ? new String(buffer.toByteArray(), StandardCharsets.UTF_8) : source; + return new String(buffer.toByteArray(), StandardCharsets.UTF_8); } /** From f2c891f9b506f8c1c9c8f5da31a5aee9c6d24303 Mon Sep 17 00:00:00 2001 From: David Walluck Date: Sat, 1 Mar 2025 11:21:48 -0500 Subject: [PATCH 3/3] Add loop optimization --- .../com/github/packageurl/PackageURL.java | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index e8552f0..97080d9 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -34,7 +34,6 @@ import java.util.TreeMap; import java.util.regex.Pattern; import java.util.stream.Collectors; -import java.util.stream.IntStream; /** *

Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:

@@ -448,14 +447,18 @@ private static String uriEncode(String source, Charset charset) { } byte[] bytes = source.getBytes(charset); + int length = bytes.length; + int pos = indexOfFirstUnreservedChar(bytes); - if (IntStream.range(0, bytes.length).allMatch(i -> isUnreserved(bytes[i]))) { + if (pos == -1) { return source; } - StringBuilder builder = new StringBuilder(source.length()); + StringBuilder builder = new StringBuilder(source.substring(0, pos)); + + for (int i = pos; i < length; i++) { + byte b = bytes[i]; - for (byte b : bytes) { if (isUnreserved(b)) { builder.append((char) b); } else { @@ -468,6 +471,20 @@ private static String uriEncode(String source, Charset charset) { return builder.toString(); } + private static int indexOfFirstUnreservedChar(final byte[] bytes) { + final int length = bytes.length; + int pos = -1; + + for (int i = 0; i < length; i++) { + if (!isUnreserved(bytes[i])) { + pos = i; + break; + } + } + + return pos; + } + private static boolean isUnreserved(int c) { return (isAlpha(c) || isDigit(c) || '-' == c || '.' == c || '_' == c || '~' == c); }