-
Notifications
You must be signed in to change notification settings - Fork 294
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal/mod/module: add support for escaping
We want to make sure that module paths can be stored in arbitrary case-insensitive filesystems without risk of clashes. The logic here has been adapted from the Go equivalent. Signed-off-by: Roger Peppe <[email protected]> Change-Id: I58a5c082f010a087136cf9535796d6b8226f81f7 Reviewed-on: https://review-eu.gerrithub.io/c/cue-lang/cue/+/1173532 TryBot-Result: CUEcueckoo <[email protected]> Reviewed-by: Daniel Martí <[email protected]> Unity-Result: CUE porcuepine <[email protected]>
- Loading branch information
Showing
3 changed files
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package module | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
"unicode/utf8" | ||
|
||
"cuelang.org/go/internal/mod/semver" | ||
) | ||
|
||
// EscapePath returns the escaped form of the given module path | ||
// (without the major version suffix). | ||
// It fails if the module path is invalid. | ||
func EscapePath(path string) (escaped string, err error) { | ||
if err := CheckPathWithoutVersion(path); err != nil { | ||
return "", err | ||
} | ||
// Technically there's no need to escape capital letters because CheckPath | ||
// doesn't allow them, but let's be defensive. | ||
return escapeString(path) | ||
} | ||
|
||
// EscapeVersion returns the escaped form of the given module version. | ||
// Versions must be in (possibly non-canonical) semver form and must be valid file names | ||
// and not contain exclamation marks. | ||
func EscapeVersion(v string) (escaped string, err error) { | ||
if !semver.IsValid(v) { | ||
return "", &InvalidVersionError{ | ||
Version: v, | ||
Err: fmt.Errorf("version is not in semver syntax"), | ||
} | ||
} | ||
if err := checkElem(v, filePath); err != nil || strings.Contains(v, "!") { | ||
return "", &InvalidVersionError{ | ||
Version: v, | ||
Err: fmt.Errorf("disallowed version string"), | ||
} | ||
} | ||
return escapeString(v) | ||
} | ||
|
||
func escapeString(s string) (escaped string, err error) { | ||
haveUpper := false | ||
for _, r := range s { | ||
if r == '!' || r >= utf8.RuneSelf { | ||
// This should be disallowed by CheckPath, but diagnose anyway. | ||
// The correctness of the escaping loop below depends on it. | ||
return "", fmt.Errorf("internal error: inconsistency in EscapePath") | ||
} | ||
if 'A' <= r && r <= 'Z' { | ||
haveUpper = true | ||
} | ||
} | ||
|
||
if !haveUpper { | ||
return s, nil | ||
} | ||
|
||
var buf []byte | ||
for _, r := range s { | ||
if 'A' <= r && r <= 'Z' { | ||
buf = append(buf, '!', byte(r+'a'-'A')) | ||
} else { | ||
buf = append(buf, byte(r)) | ||
} | ||
} | ||
return string(buf), nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,68 @@ | |
// There are no restrictions imposed directly by use of this structure, | ||
// but additional checking functions, most notably Check, verify that | ||
// a particular path, version pair is valid. | ||
// | ||
// # Escaped Paths | ||
// | ||
// Module versions appear as substrings of file system paths (as stored by | ||
// the modcache package). | ||
// In general we cannot rely on file systems to be case-sensitive. Although | ||
// module paths cannot currently contain upper case characters because | ||
// OCI registries forbid that, versions can. That | ||
// is, we cannot rely on the file system to keep foo.com/[email protected] and | ||
// foo.com/[email protected] separate. Windows and macOS don't. Instead, we must | ||
// never require two different casings of a file path. | ||
// | ||
// One possibility would be to make the escaped form be the lowercase | ||
// hexadecimal encoding of the actual path bytes. This would avoid ever | ||
// needing different casings of a file path, but it would be fairly illegible | ||
// to most programmers when those paths appeared in the file system | ||
// (including in file paths in compiler errors and stack traces) | ||
// in web server logs, and so on. Instead, we want a safe escaped form that | ||
// leaves most paths unaltered. | ||
// | ||
// The safe escaped form is to replace every uppercase letter | ||
// with an exclamation mark followed by the letter's lowercase equivalent. | ||
// | ||
// For example, | ||
// | ||
// foo.com/[email protected] -> foo.com/[email protected]!p!r!e | ||
// | ||
// Versions that avoid upper-case letters are left unchanged. | ||
// Note that because import paths are ASCII-only and avoid various | ||
// problematic punctuation (like : < and >), the escaped form is also ASCII-only | ||
// and avoids the same problematic punctuation. | ||
// | ||
// Neither versions nor module paths allow exclamation marks, so there is no | ||
// need to define how to escape a literal !. | ||
// | ||
// # Unicode Restrictions | ||
// | ||
// Today, paths are disallowed from using Unicode. | ||
// | ||
// Although paths are currently disallowed from using Unicode, | ||
// we would like at some point to allow Unicode letters as well, to assume that | ||
// file systems and URLs are Unicode-safe (storing UTF-8), and apply | ||
// the !-for-uppercase convention for escaping them in the file system. | ||
// But there are at least two subtle considerations. | ||
// | ||
// First, note that not all case-fold equivalent distinct runes | ||
// form an upper/lower pair. | ||
// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin) | ||
// are three distinct runes that case-fold to each other. | ||
// When we do add Unicode letters, we must not assume that upper/lower | ||
// are the only case-equivalent pairs. | ||
// Perhaps the Kelvin symbol would be disallowed entirely, for example. | ||
// Or perhaps it would escape as "!!k", or perhaps as "(212A)". | ||
// | ||
// Second, it would be nice to allow Unicode marks as well as letters, | ||
// but marks include combining marks, and then we must deal not | ||
// only with case folding but also normalization: both U+00E9 ('é') | ||
// and U+0065 U+0301 ('e' followed by combining acute accent) | ||
// look the same on the page and are treated by some file systems | ||
// as the same path. If we do allow Unicode marks in paths, there | ||
// must be some kind of normalization to allow only one canonical | ||
// encoding of any character used in an import path. | ||
package module | ||
|
||
// IMPORTANT NOTE | ||
|
@@ -50,10 +112,12 @@ func (m Version) Path() string { | |
return m.path | ||
} | ||
|
||
// Equal reports whether m is equal to m1. | ||
func (m Version) Equal(m1 Version) bool { | ||
return m.path == m1.path && m.version == m1.version | ||
} | ||
|
||
// BasePath returns the path part of m without its major version suffix. | ||
func (m Version) BasePath() string { | ||
basePath, _, ok := SplitPathVersion(m.path) | ||
if !ok { | ||
|
@@ -62,14 +126,23 @@ func (m Version) BasePath() string { | |
return basePath | ||
} | ||
|
||
// Version returns the version part of m. This is either | ||
// a canonical semver version or "none" or the empty string. | ||
func (m Version) Version() string { | ||
return m.version | ||
} | ||
|
||
// IsValid reports whether m is non-zero. | ||
func (m Version) IsValid() bool { | ||
return m.path != "" | ||
} | ||
|
||
// IsCanonical reports whether m is valid and has a canonical | ||
// semver version. | ||
func (m Version) IsCanonical() bool { | ||
return m.IsValid() && m.version != "" && m.version != "none" | ||
} | ||
|
||
// String returns the string form of the Version: | ||
// (Path@Version, or just Path if Version is empty). | ||
func (m Version) String() string { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters