Skip to content

Commit

Permalink
internal/mod/module: add support for escaping
Browse files Browse the repository at this point in the history
We want to make sure that module paths can be stored
in arbitrary case-insensitive filesystems without risk
of clashes. The logic here has been adapted from the Go
equivalent.

Signed-off-by: Roger Peppe <[email protected]>
Change-Id: I58a5c082f010a087136cf9535796d6b8226f81f7
Reviewed-on: https://review-eu.gerrithub.io/c/cue-lang/cue/+/1173532
TryBot-Result: CUEcueckoo <[email protected]>
Reviewed-by: Daniel Martí <[email protected]>
Unity-Result: CUE porcuepine <[email protected]>
  • Loading branch information
rogpeppe committed Dec 13, 2023
1 parent 3c187c5 commit 576c564
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 0 deletions.
68 changes: 68 additions & 0 deletions internal/mod/module/escape.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package module

import (
"fmt"
"strings"
"unicode/utf8"

"cuelang.org/go/internal/mod/semver"
)

// EscapePath returns the escaped form of the given module path
// (without the major version suffix).
// It fails if the module path is invalid.
func EscapePath(path string) (escaped string, err error) {
if err := CheckPathWithoutVersion(path); err != nil {
return "", err
}
// Technically there's no need to escape capital letters because CheckPath
// doesn't allow them, but let's be defensive.
return escapeString(path)
}

// EscapeVersion returns the escaped form of the given module version.
// Versions must be in (possibly non-canonical) semver form and must be valid file names
// and not contain exclamation marks.
func EscapeVersion(v string) (escaped string, err error) {
if !semver.IsValid(v) {
return "", &InvalidVersionError{
Version: v,
Err: fmt.Errorf("version is not in semver syntax"),
}
}
if err := checkElem(v, filePath); err != nil || strings.Contains(v, "!") {
return "", &InvalidVersionError{
Version: v,
Err: fmt.Errorf("disallowed version string"),
}
}
return escapeString(v)
}

func escapeString(s string) (escaped string, err error) {
haveUpper := false
for _, r := range s {
if r == '!' || r >= utf8.RuneSelf {
// This should be disallowed by CheckPath, but diagnose anyway.
// The correctness of the escaping loop below depends on it.
return "", fmt.Errorf("internal error: inconsistency in EscapePath")
}
if 'A' <= r && r <= 'Z' {
haveUpper = true
}
}

if !haveUpper {
return s, nil
}

var buf []byte
for _, r := range s {
if 'A' <= r && r <= 'Z' {
buf = append(buf, '!', byte(r+'a'-'A'))
} else {
buf = append(buf, byte(r))
}
}
return string(buf), nil
}
73 changes: 73 additions & 0 deletions internal/mod/module/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,68 @@
// There are no restrictions imposed directly by use of this structure,
// but additional checking functions, most notably Check, verify that
// a particular path, version pair is valid.
//
// # Escaped Paths
//
// Module versions appear as substrings of file system paths (as stored by
// the modcache package).
// In general we cannot rely on file systems to be case-sensitive. Although
// module paths cannot currently contain upper case characters because
// OCI registries forbid that, versions can. That
// is, we cannot rely on the file system to keep foo.com/[email protected] and
// foo.com/[email protected] separate. Windows and macOS don't. Instead, we must
// never require two different casings of a file path.
//
// One possibility would be to make the escaped form be the lowercase
// hexadecimal encoding of the actual path bytes. This would avoid ever
// needing different casings of a file path, but it would be fairly illegible
// to most programmers when those paths appeared in the file system
// (including in file paths in compiler errors and stack traces)
// in web server logs, and so on. Instead, we want a safe escaped form that
// leaves most paths unaltered.
//
// The safe escaped form is to replace every uppercase letter
// with an exclamation mark followed by the letter's lowercase equivalent.
//
// For example,
//
// foo.com/[email protected] -> foo.com/[email protected]!p!r!e
//
// Versions that avoid upper-case letters are left unchanged.
// Note that because import paths are ASCII-only and avoid various
// problematic punctuation (like : < and >), the escaped form is also ASCII-only
// and avoids the same problematic punctuation.
//
// Neither versions nor module paths allow exclamation marks, so there is no
// need to define how to escape a literal !.
//
// # Unicode Restrictions
//
// Today, paths are disallowed from using Unicode.
//
// Although paths are currently disallowed from using Unicode,
// we would like at some point to allow Unicode letters as well, to assume that
// file systems and URLs are Unicode-safe (storing UTF-8), and apply
// the !-for-uppercase convention for escaping them in the file system.
// But there are at least two subtle considerations.
//
// First, note that not all case-fold equivalent distinct runes
// form an upper/lower pair.
// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
// are three distinct runes that case-fold to each other.
// When we do add Unicode letters, we must not assume that upper/lower
// are the only case-equivalent pairs.
// Perhaps the Kelvin symbol would be disallowed entirely, for example.
// Or perhaps it would escape as "!!k", or perhaps as "(212A)".
//
// Second, it would be nice to allow Unicode marks as well as letters,
// but marks include combining marks, and then we must deal not
// only with case folding but also normalization: both U+00E9 ('é')
// and U+0065 U+0301 ('e' followed by combining acute accent)
// look the same on the page and are treated by some file systems
// as the same path. If we do allow Unicode marks in paths, there
// must be some kind of normalization to allow only one canonical
// encoding of any character used in an import path.
package module

// IMPORTANT NOTE
Expand Down Expand Up @@ -50,10 +112,12 @@ func (m Version) Path() string {
return m.path
}

// Equal reports whether m is equal to m1.
func (m Version) Equal(m1 Version) bool {
return m.path == m1.path && m.version == m1.version
}

// BasePath returns the path part of m without its major version suffix.
func (m Version) BasePath() string {
basePath, _, ok := SplitPathVersion(m.path)
if !ok {
Expand All @@ -62,14 +126,23 @@ func (m Version) BasePath() string {
return basePath
}

// Version returns the version part of m. This is either
// a canonical semver version or "none" or the empty string.
func (m Version) Version() string {
return m.version
}

// IsValid reports whether m is non-zero.
func (m Version) IsValid() bool {
return m.path != ""
}

// IsCanonical reports whether m is valid and has a canonical
// semver version.
func (m Version) IsCanonical() bool {
return m.IsValid() && m.version != "" && m.version != "none"
}

// String returns the string form of the Version:
// (Path@Version, or just Path if Version is empty).
func (m Version) String() string {
Expand Down
46 changes: 46 additions & 0 deletions internal/mod/module/module_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,49 @@ func TestParseVersion(t *testing.T) {
})
}
}

var escapeVersionTests = []struct {
v string
esc string // empty means same as path
}{
{v: "v1.2.3-alpha"},
{v: "v3"},
{v: "v2.3.1-ABcD", esc: "v2.3.1-!a!bc!d"},
}

func TestEscapeVersion(t *testing.T) {
for _, tt := range escapeVersionTests {
esc, err := EscapeVersion(tt.v)
if err != nil {
t.Errorf("EscapeVersion(%q): unexpected error: %v", tt.v, err)
continue
}
want := tt.esc
if want == "" {
want = tt.v
}
if esc != want {
t.Errorf("EscapeVersion(%q) = %q, want %q", tt.v, esc, want)
}
}
}

func TestEscapePath(t *testing.T) {
// Check invalid paths.
for _, tt := range checkPathWithoutVersionTests {
if tt.wantErr != "" {
_, err := EscapePath(tt.path)
if err == nil {
t.Errorf("EscapePath(%q): succeeded, want error (invalid path)", tt.path)
}
}
}
path := "foo.com/bar"
esc, err := EscapePath(path)
if err != nil {
t.Fatal(err)
}
if esc != path {
t.Fatalf("EscapePath(%q) = %q, want %q", path, esc, path)
}
}

0 comments on commit 576c564

Please sign in to comment.