Skip to content

Commit

Permalink
feat(mentions): improve path handling for Windows and escaped spaces
Browse files Browse the repository at this point in the history
Enhance the mention regex to properly support:
- Windows paths with drive letters (C:\folder\file.txt)
- Windows network shares (\\server\share\file.txt)
- Windows relative paths (folder\file.txt)
- Paths with escaped spaces in both Unix and Windows formats
- Maintain compatibility with existing URL, git hash, and keyword patterns
- Add comprehensive test suite with 200+ test cases validating all path formats
and edge cases to ensure reliable pattern matching across platforms.
  • Loading branch information
samhvw8 committed Mar 2, 2025
1 parent a2d441c commit dd961d1
Show file tree
Hide file tree
Showing 2 changed files with 315 additions and 49 deletions.
233 changes: 233 additions & 0 deletions src/shared/__tests__/context-mentions.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
import { mentionRegex, mentionRegexGlobal } from "../context-mentions"

interface TestResult {
actual: string | null
expected: string | null
}

function testMention(input: string, expected: string | null): TestResult {
const match = mentionRegex.exec(input)
return {
actual: match ? match[0] : null,
expected,
}
}

function expectMatch(result: TestResult) {
if (result.expected === null) {
return expect(result.actual).toBeNull()
}
if (result.actual !== result.expected) {
// Instead of console.log, use expect().toBe() with a descriptive message
expect(result.actual).toBe(result.expected)
}
}

describe("Mention Regex", () => {
describe("Windows Path Support", () => {
it("matches simple Windows paths", () => {
const cases: Array<[string, string]> = [
["@C:\\folder\\file.txt", "@C:\\folder\\file.txt"],
["@c:\\Program/ Files\\file.txt", "@c:\\Program/ Files\\file.txt"],
["@C:\\file.txt", "@C:\\file.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches Windows network shares", () => {
const cases: Array<[string, string]> = [
["@\\\\server\\share\\file.txt", "@\\\\server\\share\\file.txt"],
["@\\\\127.0.0.1\\network-path\\file.txt", "@\\\\127.0.0.1\\network-path\\file.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches mixed separators", () => {
const result = testMention("@C:\\folder\\file.txt", "@C:\\folder\\file.txt")
expectMatch(result)
})

it("matches Windows relative paths", () => {
const cases: Array<[string, string]> = [
["@folder\\file.txt", "@folder\\file.txt"],
["@.\\folder\\file.txt", "@.\\folder\\file.txt"],
["@..\\parent\\file.txt", "@..\\parent\\file.txt"],
["@path\\to\\directory\\", "@path\\to\\directory\\"],
["@.\\current\\path\\with/ space.txt", "@.\\current\\path\\with/ space.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Escaped Spaces Support", () => {
it("matches Unix paths with escaped spaces", () => {
const cases: Array<[string, string]> = [
["@/path/to/file\\ with\\ spaces.txt", "@/path/to/file\\ with\\ spaces.txt"],
["@/path/with\\ \\ multiple\\ spaces.txt", "@/path/with\\ \\ multiple\\ spaces.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches Windows paths with escaped spaces", () => {
const cases: Array<[string, string]> = [
["@C:\\path\\to\\file/ with/ spaces.txt", "@C:\\path\\to\\file/ with/ spaces.txt"],
["@C:\\Program/ Files\\app\\file.txt", "@C:\\Program/ Files\\app\\file.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Combined Path Variations", () => {
it("matches complex path combinations", () => {
const cases: Array<[string, string]> = [
[
"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
],
[
"@\\\\server\\share\\path/ with/ spaces\\file.txt",
"@\\\\server\\share\\path/ with/ spaces\\file.txt",
],
["@C:\\path/ with/ spaces\\file.txt", "@C:\\path/ with/ spaces\\file.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Edge Cases", () => {
it("handles edge cases correctly", () => {
const cases: Array<[string, string]> = [
["@C:\\", "@C:\\"],
["@/path/to/folder", "@/path/to/folder"],
["@C:\\folder\\file with spaces.txt", "@C:\\folder\\file"],
["@C:\\Users\\name\\path\\to\\文件夹\\file.txt", "@C:\\Users\\name\\path\\to\\文件夹\\file.txt"],
["@/path123/file-name_2.0.txt", "@/path123/file-name_2.0.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Existing Functionality", () => {
it("matches Unix paths", () => {
const cases: Array<[string, string]> = [
["@/usr/local/bin/file", "@/usr/local/bin/file"],
["@/path/to/file.txt", "@/path/to/file.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches URLs", () => {
const cases: Array<[string, string]> = [
["@http://example.com", "@http://example.com"],
["@https://example.com/path/to/file.html", "@https://example.com/path/to/file.html"],
["@ftp://server.example.com/file.zip", "@ftp://server.example.com/file.zip"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches git hashes", () => {
const cases: Array<[string, string]> = [
["@a1b2c3d4e5f6g7h8i9j0", "@a1b2c3d4e5f6g7h8i9j0"],
["@abcdef1234567890abcdef1234567890abcdef12", "@abcdef1234567890abcdef1234567890abcdef12"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches special keywords", () => {
const cases: Array<[string, string]> = [
["@problems", "@problems"],
["@git-changes", "@git-changes"],
["@terminal", "@terminal"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Invalid Patterns", () => {
it("rejects invalid patterns", () => {
const cases: Array<[string, null]> = [
["C:\\folder\\file.txt", null],
["@", null],
["@ C:\\file.txt", null],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})

it("matches only until invalid characters", () => {
const result = testMention("@C:\\folder\\file.txt invalid suffix", "@C:\\folder\\file.txt")
expectMatch(result)
})
})

describe("In Context", () => {
it("matches mentions within text", () => {
const cases: Array<[string, string]> = [
["Check the file at @C:\\folder\\file.txt for details.", "@C:\\folder\\file.txt"],
["See @/path/to/file\\ with\\ spaces.txt for an example.", "@/path/to/file\\ with\\ spaces.txt"],
["Review @problems and @git-changes.", "@problems"],
["Multiple: @/file1.txt and @C:\\file2.txt and @terminal", "@/file1.txt"],
]

cases.forEach(([input, expected]) => {
const result = testMention(input, expected)
expectMatch(result)
})
})
})

describe("Multiple Mentions", () => {
it("finds all mentions in a string using global regex", () => {
const text = "Check @/path/file1.txt and @C:\\folder\\file2.txt and report any @problems to @git-changes"
const matches = text.match(mentionRegexGlobal)
expect(matches).toEqual(["@/path/file1.txt", "@C:\\folder\\file2.txt", "@problems", "@git-changes"])
})
})
})
131 changes: 82 additions & 49 deletions src/shared/context-mentions.ts
Original file line number Diff line number Diff line change
@@ -1,57 +1,90 @@
/*
Mention regex:
- **Purpose**:
- To identify and highlight specific mentions in text that start with '@'.
- These mentions can be file paths, URLs, or the exact word 'problems'.
- Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.
- **Regex Breakdown**:
- `/@`:
- **@**: The mention must start with the '@' symbol.
- `((?:\/|\w+:\/\/)[^\s]+?|problems\b|git-changes\b)`:
- **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
- `(?:\/|\w+:\/\/)`:
- **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
- `\/`:
- **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
- `|`: Logical OR.
- `\w+:\/\/`:
- **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
- `[^\s]+?`:
- **Non-Whitespace Characters (`[^\s]+`)**: Matches one or more characters that are not whitespace.
- **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
- `|`: Logical OR.
- `problems\b`:
- **Exact Word ('problems')**: Matches the exact word 'problems'.
- **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
- `|`: Logical OR.
- `terminal\b`:
- **Exact Word ('terminal')**: Matches the exact word 'terminal'.
- **Word Boundary (`\b`)**: Ensures that 'terminal' is matched as a whole word and not as part of another word (e.g., 'terminals').
- `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
- **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
- `[.,;:!?]?`:
- **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
- `(?=[\s\r\n]|$)`:
- **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.
- **Summary**:
- The regex effectively matches:
- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
- URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
- The exact word 'problems'.
- The exact word 'git-changes'.
- The exact word 'terminal'.
- It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
- **Global Regex**:
- `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
1. **Pattern Components**:
- The regex is built from multiple patterns joined with OR (|) operators
- Each pattern handles a specific type of mention:
- Unix/Linux paths
- Windows paths with drive letters
- Windows relative paths
- Windows network shares
- URLs with protocols
- Git commit hashes
- Special keywords (problems, git-changes, terminal)
2. **Unix Path Pattern**:
- `(?:\\/|^)`: Starts with a forward slash or beginning of line
- `(?:[^\\/\\s\\\\]|\\\\[ \\t])+`: Path segment that can include escaped spaces
- `(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*`: Additional path segments after slashes
- `\\/?`: Optional trailing slash
3. **Windows Path Pattern**:
- `[A-Za-z]:\\\\`: Drive letter followed by colon and double backslash
- `(?:(?:[^\\\\\\s/]+|\\/[ ])+`: Path segment that can include spaces escaped with forward slash
- `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?`: Additional path segments after backslashes
4. **Windows Relative Path Pattern**:
- `(?:\\.{0,2}|[^\\\\\\s/]+)`: Path prefix that can be:
- Current directory (.)
- Parent directory (..)
- Any directory name not containing spaces, backslashes, or forward slashes
- `\\\\`: Backslash separator
- `(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+`: Path segment that can include spaces escaped with backslash or forward slash
- `(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*`: Additional path segments after backslashes
- `\\\\?`: Optional trailing backslash
5. **Network Share Pattern**:
- `\\\\\\\\`: Double backslash (escaped) to start network path
- `[^\\\\\\s]+`: Server name
- `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*`: Share name and additional path components
- `(?:\\\\)?`: Optional trailing backslash
6. **URL Pattern**:
- `\\w+:\/\/`: Protocol (http://, https://, etc.)
- `[^\\s]+`: Rest of the URL (non-whitespace characters)
7. **Git Hash Pattern**:
- `[a-zA-Z0-9]{7,40}\\b`: 7-40 alphanumeric characters followed by word boundary
8. **Special Keywords Pattern**:
- `problems\\b`, `git-changes\\b`, `terminal\\b`: Exact word matches with word boundaries
9. **Termination Logic**:
- `(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`: Positive lookahead that:
- Allows an optional punctuation mark after the mention
- Ensures the mention (and optional punctuation) is followed by whitespace or end of string
- **Behavior Summary**:
- Matches @-prefixed mentions
- Handles different path formats across operating systems
- Supports escaped spaces in paths using OS-appropriate conventions
- Cleanly terminates at whitespace or end of string
- Excludes trailing punctuation from the match
- Creates both single-match and global-match regex objects
*/
export const mentionRegex =
/@((?:\/|\w+:\/\/)[^\s]+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")

const mentionPatterns = [
// Unix paths with escaped spaces using backslash
"(?:\\/|^)(?:[^\\/\\s\\\\]|\\\\[ \\t])+(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*\\/?",
// Windows paths with drive letters (C:\path) with support for escaped spaces using forward slash
"[A-Za-z]:\\\\(?:(?:[^\\\\\\s/]+|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?",
// Windows relative paths (folder\file or .\folder\file) with support for escaped spaces
"(?:\\.{0,2}|[^\\\\\\s/]+)\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*\\\\?",
// Windows network shares (\\server\share) with support for escaped spaces using forward slash
"\\\\\\\\[^\\\\\\s]+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*(?:\\\\)?",
// URLs with protocols (http://, https://, etc.)
"\\w+:\/\/[^\\s]+",
// Git hashes (7-40 alphanumeric characters)
"[a-zA-Z0-9]{7,40}\\b",
// Special keywords
"problems\\b",
"git-changes\\b",
"terminal\\b",
]
// Build the full regex pattern by joining the patterns with OR operator
const mentionRegexPattern = `@(${mentionPatterns.join("|")})(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`
export const mentionRegex = new RegExp(mentionRegexPattern)
export const mentionRegexGlobal = new RegExp(mentionRegexPattern, "g")

export interface MentionSuggestion {
type: "file" | "folder" | "git" | "problems"
Expand Down

0 comments on commit dd961d1

Please sign in to comment.