Skip to content

Commit

Permalink
feat: url parse add sanitize (#83)
Browse files Browse the repository at this point in the history
* feat: add sanitizeString function and enhance query parsing (#83)

- Introduced sanitizeString function to clean and sanitize input strings, allowing for customizable character replacement.
- Updated parseQueryString and parseQueryStringArray functions to include an optional sanitize parameter, enabling automatic sanitization of query strings.
- Added comprehensive unit tests for sanitizeString, covering various scenarios including special characters, non-ASCII characters, and empty strings.
- Enhanced documentation with detailed JSDoc comments for the new sanitizeString function and updated existing functions to reflect the new sanitization feature.

* feat: enhance query string parsing with sanitization and additional tests

- Renamed the test for parseQueryStringArray to include a version identifier for clarity.
- Introduced a new test case for parseQueryStringArray that validates sanitization of query parameters, ensuring special characters are handled correctly.
- Updated the parseQueryStringArray function to support an optional sanitizeWithSeparator parameter, allowing for customizable sanitization behavior.
- Enhanced the sanitizeString function to support custom character replacement, improving flexibility in string sanitization.
- Added comprehensive unit tests for sanitizeString, covering various scenarios including custom replacements and mixed character inputs.

* chore: update .gitignore to exclude .DS_Store files
  • Loading branch information
iugo authored Dec 20, 2024
1 parent 924caa9 commit 9742a76
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
temp
.DS_Store
95 changes: 94 additions & 1 deletion js/url-parse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
parseQueryPositiveInts,
parseQueryString,
parseQueryStringArray,
sanitizeString,
} from './url-parse.ts';

Deno.test('parseQueryString', () => {
Expand Down Expand Up @@ -64,7 +65,7 @@ Deno.test('parseQueryPositiveInt', () => {
assertThrows(() => parseQueryPositiveInt(i), TypeError); // 非数字字符串
});

Deno.test('parseQueryStringArray', () => {
Deno.test('parseQueryStringArray #1', () => {
const url = new URL('https://example.com/path');
url.searchParams.set('a', 'a,b,c');
url.searchParams.set('b', 'x|y|z');
Expand All @@ -89,6 +90,41 @@ Deno.test('parseQueryStringArray', () => {
);
});

Deno.test('parseQueryStringArray with sanitization', () => {
const url = new URL('https://example.com/path');
url.searchParams.set('a', '<script>,alert(1),</script>');
url.searchParams.set('b', 'hello!world,test@email');
url.searchParams.set('c', '你好!世界,test#123');

// 测试使用 sanitizeWithSeparator
assertEquals(
parseQueryStringArray(url.searchParams.get('a'), {
sanitizeWithSeparator: true,
}),
['script', 'alert', '1', 'script'],
);

assertEquals(
parseQueryStringArray(url.searchParams.get('b'), {
sanitizeWithSeparator: true,
}),
['hello', 'world', 'test', 'email'],
);

assertEquals(
parseQueryStringArray(url.searchParams.get('c'), {
sanitizeWithSeparator: true,
}),
['你好', '世界', 'test', '123'],
);

// 测试不使用 sanitizeWithSeparator(应该抛出错误)
assertThrows(
() => parseQueryStringArray(url.searchParams.get('a')),
TypeError,
);
});

Deno.test('parseQueryNumber', () => {
const url = new URL('https://example.com/path');
url.searchParams.set('a', '123');
Expand Down Expand Up @@ -188,3 +224,60 @@ Deno.test('parseQueryNumbers', () => {
assertThrows(() => parseQueryNumbers(url.searchParams.get('f')), TypeError);
assertThrows(() => parseQueryNumbers(url.searchParams.get('g')), TypeError);
});

Deno.test('sanitizeString', () => {
// 测试基本功能(默认替换为空字符串)
assertEquals(sanitizeString('hello world'), 'hello world');
assertEquals(sanitizeString('abc123'), 'abc123');
assertEquals(sanitizeString('user_name'), 'user_name');
assertEquals(sanitizeString('hello!world'), 'helloworld');
assertEquals(sanitizeString('[email protected]'), 'testemailcom');

// 测试使用自定义替换字符
assertEquals(
sanitizeString('hello!world', { replaceWith: '_' }),
'hello_world',
);
assertEquals(
sanitizeString('[email protected]', { replaceWith: '.' }),
'test.email.com',
);
assertEquals(
sanitizeString('<script>alert(1)</script>', { replaceWith: '-' }),
'-script-alert-1---script-',
);
assertEquals(sanitizeString('[test]', { replaceWith: '_' }), '_test_');

// 测试非 ASCII 字符(应该保留)
assertEquals(sanitizeString('你好世界'), '你好世界');
assertEquals(sanitizeString('こんにちは'), 'こんにちは');
assertEquals(sanitizeString('안녕하세요'), '안녕하세요');

// 测试混合字符和自定义替换
assertEquals(
sanitizeString('hello@世界', { replaceWith: '_' }),
'hello_世界',
);
assertEquals(
sanitizeString('test!你好#world', { replaceWith: '-' }),
'test-你好-world',
);
assertEquals(
sanitizeString('안녕!@#$%^&*하세요', { replaceWith: '.' }),
'안녕........하세요',
);

// 测试空字符串
assertEquals(sanitizeString(''), '');

// 测试只包含特殊字符的字符串
assertEquals(sanitizeString('!@#$%^&*()'), '');
assertEquals(
sanitizeString('!@#$%^&*()', { replaceWith: '_' }),
'__________',
);

// 测试空格相关
assertEquals(sanitizeString(' hello world '), ' hello world ');
assertEquals(sanitizeString('\thello\nworld\r'), '\thello\nworld\r');
});
67 changes: 64 additions & 3 deletions js/url-parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,63 @@ import { isSafeString, SafeString } from '../ts/string.ts';
*/
export function parseQueryString(
query: string | null,
{ sanitize = false }: {
/**
* 是否移除不安全字符 (清理净化参数)
* 默认不移除
*/
sanitize?: boolean;
} = {},
): SafeString | undefined {
if (query === null) {
return undefined;
}
const trimmedQuery = query.trim();
let trimmedQuery = query.trim();
if (trimmedQuery === 'undefined' || trimmedQuery === '') {
return undefined;
}
if (sanitize) {
trimmedQuery = sanitizeString(trimmedQuery);
}
if (!isSafeString(trimmedQuery)) {
throw new TypeError(`invalid query string: ${query}`);
}
return trimmedQuery;
}

/**
* 清理净化字符串
* @param str - 需要清理净化的字符串
* @param options - Configuration options
* 配置选项
* @param options.replaceWith - 用于替换特殊字符的字符串
* 默认为空字符串 ''
* @returns 清理净化后的字符串
*/
export function sanitizeString(
str: string,
{
/**
* 用于替换特殊字符的字符串
* 默认为空字符串 ''
*/
replaceWith = '',
}: {
replaceWith?: string;
} = {},
): string {
// 移除 ASCII 范围内的特殊字符 (除了空格、数字、字母和下划线)
// ASCII 范围: 0x21-0x2F (! " # $ % & ' ( ) * + , - . /)
// 0x3A-0x40 (: ; < = > ? @)
// 0x5B-0x5E ([ \ ] ^)
// 0x60 (`)
// 0x7B-0x7E ({ | } ~)
return str.replace(
/[\x21-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7E]/g,
replaceWith,
);
}

/**
* Convert URL query parameter value to array of strings
* 将 URL 查询参数值转换为字符串数组
Expand Down Expand Up @@ -70,14 +113,32 @@ export function parseQueryString(
*/
export function parseQueryStringArray(
query: string | null,
{ separator = ',' }: { separator?: string } = {},
{
separator = ',',
sanitizeWithSeparator = false,
}: {
/**
* 用于分割字符串的分隔符
* 默认为逗号 ','
*/
separator?: string;
/**
* 用于替换特殊字符的字符串
* 默认不进行特殊字符替换 (如果出现会报错)
*/
sanitizeWithSeparator?: boolean;
} = {},
): string[] | undefined {
if (query === null || query === 'undefined' || query === '') {
return undefined;
}

const queryString = sanitizeWithSeparator
? sanitizeString(query, { replaceWith: separator })
: query;

try {
const arr = query
const arr = queryString
.split(separator)
.map((v) => parseQueryString(v) ?? '')
.filter(Boolean);
Expand Down

0 comments on commit 9742a76

Please sign in to comment.