From 174916ac7736a7465f10845cd90bebe55e73aa88 Mon Sep 17 00:00:00 2001 From: Jan Jakes Date: Fri, 10 Jan 2025 10:09:42 +0100 Subject: [PATCH] Add support for LIKE BINARY --- tests/WP_SQLite_Driver_Tests.php | 119 ++++++++----- .../sqlite-ast/class-wp-sqlite-driver.php | 46 +++++ ...s-wp-sqlite-pdo-user-defined-functions.php | 163 +++++++++++++----- 3 files changed, 250 insertions(+), 78 deletions(-) diff --git a/tests/WP_SQLite_Driver_Tests.php b/tests/WP_SQLite_Driver_Tests.php index 2e658cb5..13bbfcf9 100644 --- a/tests/WP_SQLite_Driver_Tests.php +++ b/tests/WP_SQLite_Driver_Tests.php @@ -3033,13 +3033,13 @@ public function testTranslatesUtf8SELECT() { $this->assertQuery( 'DELETE FROM _options' ); } - public function testTranslateLikeBinaryAndGlob() { + public function testTranslateLikeBinary() { // Create a temporary table for testing $this->assertQuery( - "CREATE TABLE _tmp_table ( - ID INTEGER PRIMARY KEY AUTO_INCREMENT NOT NULL, - name varchar(20) NOT NULL default '' - );" + 'CREATE TABLE _tmp_table ( + ID INTEGER PRIMARY KEY AUTO_INCREMENT NOT NULL, + name varchar(20) + )' ); // Insert data into the table @@ -3052,70 +3052,111 @@ public function testTranslateLikeBinaryAndGlob() { $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special%chars');" ); $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special_chars');" ); $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special\\chars');" ); + $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('aste*risk');" ); + $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('question?mark');" ); - // Test case-sensitive LIKE BINARY + // Test exact string $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-sensitive LIKE BINARY with wildcard % + // Test exact string with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" ); + $this->assertCount( 0, $result ); + + // Test mixed case + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" ); + $this->assertCount( 0, $result ); + + // Test % wildcard $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f%'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-sensitive LIKE BINARY with wildcard _ + // Test % wildcard with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x%'" ); + $this->assertCount( 0, $result ); + + // Test "%" character (not a wildcard) + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test _ wildcard $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f_rst'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-insensitive LIKE - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" ); - $this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST' - - // Test mixed case with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" ); - $this->assertCount( 0, $result ); - - // Test no matches with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" ); + // Test _ wildcard with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x_yz'" ); $this->assertCount( 0, $result ); - // Test GLOB equivalent for case-sensitive matching with wildcard - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f*'" ); + // Test "_" character (not a wildcard) + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\_chars'" ); $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); + $this->assertEquals( 'special_chars', $result[0]->name ); - // Test GLOB with single character wildcard - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f?rst'" ); + // Test escaping of "*" + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'aste*risk'" ); $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); + $this->assertEquals( 'aste*risk', $result[0]->name ); - // Test GLOB with no matches - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'S*'" ); + // Test escaping of "*" with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f*'" ); $this->assertCount( 0, $result ); - // Test GLOB case sensitivity with LIKE and GLOB - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'first';" ); - $this->assertCount( 1, $result ); // Should only match 'first' + // Test escaping of "?" + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'question?mark'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'question?mark', $result[0]->name ); - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'FIRST';" ); - $this->assertCount( 1, $result ); // Should only match 'FIRST' + // Test escaping of "?" with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f?rst'" ); + $this->assertCount( 0, $result ); - // Test NULL comparison with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first';" ); - $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); + // Test escaping of character class + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '[f]irst'" ); + $this->assertCount( 0, $result ); - $result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL;' ); - $this->assertCount( 0, $result ); // NULL comparison should return no results + // Test NULL + $result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL' ); + $this->assertCount( 0, $result ); // Test pattern with special characters using LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%';" ); + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%'" ); $this->assertCount( 4, $result ); $this->assertEquals( '%special%', $result[0]->name ); $this->assertEquals( 'special%chars', $result[1]->name ); $this->assertEquals( 'special_chars', $result[2]->name ); - $this->assertEquals( 'specialchars', $result[3]->name ); + $this->assertEquals( 'special\chars', $result[3]->name ); + + // Test escaping - "\t" is a tab character + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\t'" ); + $this->assertCount( 0, $result ); + + // Test escaping - "\\t" is "t" (input resolves to "\t", which LIKE resolves to "t") + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\\\t'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'first', $result[0]->name ); + + // Test escaping - "\%" is a "%" literal + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test escaping - "\\%" is also a "%" literal + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test escaping - "\\\%" is "\" and a wildcard + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special\\chars', $result[0]->name ); + + // Test LIKE without BINARY + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" ); + $this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST' } public function testOnConflictReplace() { diff --git a/wp-includes/sqlite-ast/class-wp-sqlite-driver.php b/wp-includes/sqlite-ast/class-wp-sqlite-driver.php index 30752a07..d18d92a1 100644 --- a/wp-includes/sqlite-ast/class-wp-sqlite-driver.php +++ b/wp-includes/sqlite-ast/class-wp-sqlite-driver.php @@ -1313,6 +1313,12 @@ private function translate( $ast ) { throw $this->not_supported_exception( sprintf( 'data type: %s', $child->value ) ); + case 'predicateOperations': + $token = $ast->get_child_token(); + if ( WP_MySQL_Lexer::LIKE_SYMBOL === $token->id ) { + return $this->translate_like( $ast ); + } + return $this->translate_sequence( $ast->get_children() ); case 'systemVariable': // @TODO: Emulate some system variables, or use reasonable defaults. // See: https://dev.mysql.com/doc/refman/8.4/en/server-system-variable-reference.html @@ -1346,6 +1352,13 @@ private function translate_token( WP_MySQL_Token $token ) { return '"' . trim( $token->value, '`"' ) . '"'; case WP_MySQL_Lexer::AUTO_INCREMENT_SYMBOL: return 'AUTOINCREMENT'; + case WP_MySQL_Lexer::BINARY_SYMBOL: + /* + * There is no "BINARY expr" equivalent in SQLite. We can look for + * the BINARY keyword in particular cases (with REGEXP, LIKE, etc.) + * and then remove it from the translated output here. + */ + return null; default: return $token->value; } @@ -1370,6 +1383,39 @@ private function translate_sequence( array $nodes, string $separator = ' ' ): ?s return implode( $separator, $parts ); } + private function translate_like( WP_Parser_Node $node ): string { + $tokens = $node->get_descendant_tokens(); + $is_binary = isset( $tokens[1] ) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id; + + if ( true === $is_binary ) { + $children = $node->get_children(); + return sprintf( + 'GLOB _helper_like_to_glob_pattern(%s)', + $this->translate( $children[1] ) + ); + } + + /* + * @TODO: Implement the ESCAPE '...' clause. + */ + + /* + * @TODO: Implement more correct LIKE behavior. + * + * While SQLite supports the LIKE operator, it seems to differ from the + * MySQL behavior in some ways: + * + * 1. In SQLite, LIKE is case-insensitive only for ASCII characters + * ('a' LIKE 'A' is TRUE but 'æ' LIKE 'Æ' is FALSE) + * 2. In MySQL, LIKE interprets some escape sequences. See the contents + * of the "_helper_like_to_glob_pattern" function. + * + * We'll probably need to overload the like() function: + * https://www.sqlite.org/lang_corefunc.html#like + */ + return $this->translate_sequence( $node->get_children() ); + } + private function get_sqlite_create_table_statement( string $table_name, ?string $new_table_name = null ): array { // 1. Get table info. $table_info = $this->execute_sqlite_query( diff --git a/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php b/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php index 6f0d83df..010e5a92 100644 --- a/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php +++ b/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php @@ -46,45 +46,48 @@ public function __construct( $pdo ) { * @var array */ private $functions = array( - 'month' => 'month', - 'monthnum' => 'month', - 'year' => 'year', - 'day' => 'day', - 'hour' => 'hour', - 'minute' => 'minute', - 'second' => 'second', - 'week' => 'week', - 'weekday' => 'weekday', - 'dayofweek' => 'dayofweek', - 'dayofmonth' => 'dayofmonth', - 'unix_timestamp' => 'unix_timestamp', - 'now' => 'now', - 'md5' => 'md5', - 'curdate' => 'curdate', - 'rand' => 'rand', - 'from_unixtime' => 'from_unixtime', - 'localtime' => 'now', - 'localtimestamp' => 'now', - 'isnull' => 'isnull', - 'if' => '_if', - 'regexp' => 'regexp', - 'field' => 'field', - 'log' => 'log', - 'least' => 'least', - 'greatest' => 'greatest', - 'get_lock' => 'get_lock', - 'release_lock' => 'release_lock', - 'ucase' => 'ucase', - 'lcase' => 'lcase', - 'unhex' => 'unhex', - 'inet_ntoa' => 'inet_ntoa', - 'inet_aton' => 'inet_aton', - 'datediff' => 'datediff', - 'locate' => 'locate', - 'utc_date' => 'utc_date', - 'utc_time' => 'utc_time', - 'utc_timestamp' => 'utc_timestamp', - 'version' => 'version', + 'month' => 'month', + 'monthnum' => 'month', + 'year' => 'year', + 'day' => 'day', + 'hour' => 'hour', + 'minute' => 'minute', + 'second' => 'second', + 'week' => 'week', + 'weekday' => 'weekday', + 'dayofweek' => 'dayofweek', + 'dayofmonth' => 'dayofmonth', + 'unix_timestamp' => 'unix_timestamp', + 'now' => 'now', + 'md5' => 'md5', + 'curdate' => 'curdate', + 'rand' => 'rand', + 'from_unixtime' => 'from_unixtime', + 'localtime' => 'now', + 'localtimestamp' => 'now', + 'isnull' => 'isnull', + 'if' => '_if', + 'regexp' => 'regexp', + 'field' => 'field', + 'log' => 'log', + 'least' => 'least', + 'greatest' => 'greatest', + 'get_lock' => 'get_lock', + 'release_lock' => 'release_lock', + 'ucase' => 'ucase', + 'lcase' => 'lcase', + 'unhex' => 'unhex', + 'inet_ntoa' => 'inet_ntoa', + 'inet_aton' => 'inet_aton', + 'datediff' => 'datediff', + 'locate' => 'locate', + 'utc_date' => 'utc_date', + 'utc_time' => 'utc_time', + 'utc_timestamp' => 'utc_timestamp', + 'version' => 'version', + + // Internal helper functions. + '_helper_like_to_glob_pattern' => '_helper_like_to_glob_pattern', ); /** @@ -759,4 +762,86 @@ public function utc_timestamp() { public function version() { return '5.5'; } + + /** + * A helper to covert LIKE pattern to a GLOB pattern for "LIKE BINARY" support. + + * @TODO: Some of the MySQL string specifics described below are likely to + * affect also other patterns than just "LIKE BINARY". We should + * consider applying some of the conversions more broadly. + * + * @param string $pattern + * @return string + */ + public function _helper_like_to_glob_pattern( $pattern ) { + if ( null === $pattern ) { + return null; + } + + /* + * 1. Normalize escaping of "%" and "_" characters. + * + * MySQL has unusual handling for "\%" and "\_" in all string literals. + * While other sequences follow the C-style escaping ("\?" is "?", etc.), + * "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings). + * + * This means that "\%" behaves like "\\%", and "\_" behaves like "\\_". + * To preserve this behavior, we need to add a second backslash in cases + * where only one is used. To do so correctly, we need to: + * + * 1. Skip all double backslash patterns (as "\\" resolves to "\"). + * 2. Add an extra backslash when "\%" or "\_" follows right after. + * + * This may be related to: https://bugs.mysql.com/bug.php?id=84118 + */ + $pattern = preg_replace( '/(^|[^\\\\](?:\\\\{2}))*(\\\\[%_])/', '$1\\\\$2', $pattern ); + + /* + * 2. Unescape C-style escape sequences. + * + * MySQL string literals are represented using C-style encoded strings, + * but the GLOB pattern in SQLite doesn't support such escaping. + */ + $pattern = stripcslashes( $pattern ); + + /* + * 3. Escape characters that have special meaning in GLOB patterns. + * + * We need to: + * 1. Escape "]" as "[]]" to avoid interpreting "[...]" as a character class. + * 2. Escape "*" as "[*]" (must be after 1 to avoid being escaped). + * 3. Escape "?" as "[?]" (must be after 1 to avoid being escaped). + */ + $pattern = str_replace( ']', '[]]', $pattern ); + $pattern = str_replace( '*', '[*]', $pattern ); + $pattern = str_replace( '?', '[?]', $pattern ); + + /* + * 4. Convert LIKE wildcards to GLOB wildcards ("%" -> "*", "_" -> "?"). + * + * We need to convert them only when they don't follow any backslashes, + * or when they follow an even number of backslashes (as "\\" is "\"). + */ + $pattern = preg_replace( '/(^|[^\\\\](?:\\\\{2})*)%/', '$1*', $pattern ); + $pattern = preg_replace( '/(^|[^\\\\](?:\\\\{2})*)_/', '$1?', $pattern ); + + /* + * 5. Unescape LIKE escape sequences. + * + * While in MySQL LIKE patterns, a backslash is usually used to escape + * special characters ("%", "_", and "\"), it works with all characters. + * + * That is: + * SELECT '\\x' prints '\x', but LIKE '\\x' is equivalent to LIKE 'x'. + * + * This is true also for multi-byte characters: + * SELECT '\\©' prints '\©', but LIKE '\\©' is equivalent to LIKE '©'. + * + * However, the multi-byte behavior is likely to depend on the charset. + * For now, we'll assume UTF-8 and thus the "u" modifier for the regex. + */ + $pattern = preg_replace( '/\\\\(.)/u', '$1', $pattern ); + + return $pattern; + } }