diff --git a/src/DateFormatPattern.php b/src/DateFormatPattern.php new file mode 100644 index 0000000..9f16b0a --- /dev/null +++ b/src/DateFormatPattern.php @@ -0,0 +1,81 @@ + + * Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. + */ + public static function tokenize(string $pattern): array + { + static $cache = []; + + return $cache[$pattern] ??= self::do_tokenize($pattern); + } + + /** + * @param string $pattern + * A date format pattern; for example, "yyyy.MM.dd G 'at' HH:mm:ss zzz". + * + * @return array + * Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. + */ + private static function do_tokenize(string $pattern): array + { + $tokens = []; + $is_literal = false; + $literal = ''; + $z = mb_strlen($pattern); + + for ($i = 0; $i < $z; ++$i) { + $c = mb_substr($pattern, $i, 1); + + if ($c === self::QUOTE) { + // Two adjacent single vertical quotes (''), which represent a literal single quote, + // either inside or outside a quoted text. + if (mb_substr($pattern, $i + 1, 1) === self::QUOTE) { + $i++; + $literal .= self::QUOTE; + } else { + // Toggle literal + $is_literal = !$is_literal; + } + } elseif ($is_literal) { + $literal .= $c; + } elseif (ctype_alpha($c)) { + if ($literal) { + $tokens[] = $literal; + $literal = ''; + } + + for ($j = $i + 1; $j < $z; ++$j) { + $nc = mb_substr($pattern, $j, 1); + if ($nc !== $c) { + break; + } + } + $tokens[] = [ $c, $j - $i ]; + $i = $j - 1; // because +1 from the for loop + } else { + $literal .= $c; + } + } + + // If the pattern ends with literal (could also be a malformed quote) + if ($literal) { + $tokens[] = $literal; + } + + return $tokens; + } +} diff --git a/src/DateTimeFormatter.php b/src/DateTimeFormatter.php index e80b4ee..756b925 100644 --- a/src/DateTimeFormatter.php +++ b/src/DateTimeFormatter.php @@ -4,6 +4,7 @@ use DateTimeImmutable; use DateTimeInterface; +use InvalidArgumentException; use RuntimeException; use function ceil; @@ -66,68 +67,6 @@ class DateTimeFormatter implements Formatter ]; - /** - * Parses the datetime format pattern. - * - * @return array - * Where _value_ is either a literal or an array where `0` is a formatter method and `1` a length. - */ - private static function tokenize(string $pattern): array - { - static $formats = []; - - if (isset($formats[$pattern])) { - return $formats[$pattern]; - } - - $tokens = []; - $is_literal = false; - $literal = ''; - - for ($i = 0, $n = strlen($pattern); $i < $n; ++$i) { - $c = $pattern[$i]; - - if ($c === "'") { - if ($i < $n - 1 && $pattern[$i + 1] === "'") { - $tokens[] = "'"; - $i++; - } else { - if ($is_literal) { - $tokens[] = $literal; - $literal = ''; - $is_literal = false; - } else { - $is_literal = true; - $literal = ''; - } - } - } else { - if ($is_literal) { - $literal .= $c; - } else { - for ($j = $i + 1; $j < $n; ++$j) { - if ($pattern[$j] !== $c) { - break; - } - } - - $l = $j - $i; - $p = str_repeat($c, $l); - - $tokens[] = isset(self::$formatters[$c]) ? [ self::$formatters[$c], $l ] : $p; - - $i = $j - 1; - } - } - } - - if ($literal) { - $tokens[] = $literal; - } - - return $formats[$pattern] = $tokens; - } - /** * Pad a numeric value with zero on its left. */ @@ -181,16 +120,19 @@ public function format( $datetime, string|DateTimeFormatLength|DateTimeFormatId $pattern_or_length_or_id ): string { - $datetime = $this->ensure_datetime($datetime); - $datetime = new DateTimeAccessor($datetime); + $accessor = new DateTimeAccessor($this->ensure_datetime($datetime)); $pattern = $this->resolve_pattern($pattern_or_length_or_id); - $tokens = self::tokenize($pattern); + $tokens = DateFormatPattern::tokenize($pattern); $rc = ''; foreach ($tokens as $token) { if (is_array($token)) { // a callback: method name, repeating chars - $token = $this->{$token[0]}($datetime, $token[1]); + [ $c, $l ] = $token; + + $function = self::$formatters[$c] ?? + throw new InvalidArgumentException("Invalid date pattern character '$c' used in '$pattern'"); + $token = $this->$function($accessor, $l); } $rc .= $token; @@ -200,20 +142,11 @@ public function format( } /** - * Resolves the specified pattern, which can be a width, a skeleton or an actual pattern. + * Resolves the specified pattern, which can be a width, a skeleton, or an actual pattern. */ protected function resolve_pattern( string|DateTimeFormatLength|DateTimeFormatId $pattern_or_length_or_id ): string { - if (is_string($pattern_or_length_or_id) && $pattern_or_length_or_id[0] === ':') { - trigger_error( - "Prefixing date time format ids with ':' is no longer supported, use DateTimeFormatId instead", - E_USER_DEPRECATED - ); - - $pattern_or_length_or_id = DateTimeFormatId::from(substr($pattern_or_length_or_id, 1)); - } - if ($pattern_or_length_or_id instanceof DateTimeFormatLength) { $length = $pattern_or_length_or_id->value; $calendar = $this->calendar; diff --git a/tests/DateTimePatternTest.php b/tests/DateTimePatternTest.php new file mode 100644 index 0000000..963a2ae --- /dev/null +++ b/tests/DateTimePatternTest.php @@ -0,0 +1,39 @@ +assertEquals($expected, $actual); + } + + public static function provide_tokenize(): array + { + return [ + + [ 'G', [ [ 'G', 1 ] ] ], + [ 'GG', [ [ 'G', 2 ] ] ], + [ 'GGG', [ [ 'G', 3 ] ] ], + [ 'GGGG', [ [ 'G', 4 ] ] ], + [ 'GGGGG', [ [ 'G', 5 ] ] ], + [ 'E d', [ [ 'E', 1 ], ' ', [ 'd', 1 ] ] ], + [ 'E h:mm a', [ [ 'E', 1 ], ' ', [ 'h', 1 ], ':', [ 'm', 2 ], ' ', [ 'a', 1] ] ], + [ 'E d/M/y', [ [ 'E', 1 ], ' ', [ 'd', 1 ], '/', [ 'M', 1 ], '/', [ 'y', 1] ] ], + [ "E 'd/M/'y", [ [ 'E', 1 ], " d/M/", [ 'y', 1] ] ], + [ "'week' W 'of' MMMM", [ "week ", [ 'W', 1 ], " of ", [ 'M', 4 ] ] ], + [ "EEE, MMM d, ''yy", [ [ 'E', 3 ], ", ", [ 'M', 3 ], " ", [ 'd', 1 ], ", '", [ 'y', 2 ] ] ], + [ "h:mm a", [ [ 'h', 1 ], ":", [ 'm', 2 ], " ", [ 'a', 1 ] ] ], + [ "hh 'o''clock' a, zzzz", [ [ 'h', 2 ], " o'clock ", [ 'a', 1 ], ', ', [ 'z', 4 ] ] ], + + ]; + } +}