Skip to content

Commit

Permalink
Extract DateTimePattern
Browse files Browse the repository at this point in the history
  • Loading branch information
olvlvl committed Aug 10, 2024
1 parent 24c1032 commit 0f57020
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 76 deletions.
81 changes: 81 additions & 0 deletions src/DateFormatPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php

namespace ICanBoogie\CLDR;

/**
* @link https://www.unicode.org/reports/tr35/tr35-72/tr35-dates.html#Date_Format_Patterns
*/
final class DateFormatPattern
{
private const QUOTE = "'";

/**
* @param string $pattern
* A date format pattern; for example, "yyyy.MM.dd G 'at' HH:mm:ss zzz".
*
* @return array<string|array{ string, int }>
* Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length.
*/
public static function tokenize(string $pattern): array
{
static $cache = [];

return $cache[$pattern] ??= self::do_tokenize($pattern);
}

/**
* @param string $pattern
* A date format pattern; for example, "yyyy.MM.dd G 'at' HH:mm:ss zzz".
*
* @return array<string|array{ string, int }>
* Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length.
*/
private static function do_tokenize(string $pattern): array
{
$tokens = [];
$is_literal = false;
$literal = '';
$z = mb_strlen($pattern);

for ($i = 0; $i < $z; ++$i) {
$c = mb_substr($pattern, $i, 1);

if ($c === self::QUOTE) {
// Two adjacent single vertical quotes (''), which represent a literal single quote,
// either inside or outside a quoted text.
if (mb_substr($pattern, $i + 1, 1) === self::QUOTE) {
$i++;
$literal .= self::QUOTE;
} else {
// Toggle literal
$is_literal = !$is_literal;
}
} elseif ($is_literal) {
$literal .= $c;
} elseif (ctype_alpha($c)) {
if ($literal) {
$tokens[] = $literal;
$literal = '';
}

for ($j = $i + 1; $j < $z; ++$j) {
$nc = mb_substr($pattern, $j, 1);
if ($nc !== $c) {
break;
}
}
$tokens[] = [ $c, $j - $i ];
$i = $j - 1; // because +1 from the for loop
} else {
$literal .= $c;
}
}

// If the pattern ends with literal (could also be a malformed quote)
if ($literal) {
$tokens[] = $literal;
}

return $tokens;
}
}
85 changes: 9 additions & 76 deletions src/DateTimeFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use DateTimeImmutable;
use DateTimeInterface;
use InvalidArgumentException;
use RuntimeException;

use function ceil;
Expand Down Expand Up @@ -66,68 +67,6 @@ class DateTimeFormatter implements Formatter

];

/**
* Parses the datetime format pattern.
*
* @return array<string|array{0: string, 1: int}>
* Where _value_ is either a literal or an array where `0` is a formatter method and `1` a length.
*/
private static function tokenize(string $pattern): array
{
static $formats = [];

if (isset($formats[$pattern])) {
return $formats[$pattern];
}

$tokens = [];
$is_literal = false;
$literal = '';

for ($i = 0, $n = strlen($pattern); $i < $n; ++$i) {
$c = $pattern[$i];

if ($c === "'") {
if ($i < $n - 1 && $pattern[$i + 1] === "'") {
$tokens[] = "'";
$i++;
} else {
if ($is_literal) {
$tokens[] = $literal;
$literal = '';
$is_literal = false;
} else {
$is_literal = true;
$literal = '';
}
}
} else {
if ($is_literal) {
$literal .= $c;
} else {
for ($j = $i + 1; $j < $n; ++$j) {
if ($pattern[$j] !== $c) {
break;
}
}

$l = $j - $i;
$p = str_repeat($c, $l);

$tokens[] = isset(self::$formatters[$c]) ? [ self::$formatters[$c], $l ] : $p;

$i = $j - 1;
}
}
}

if ($literal) {
$tokens[] = $literal;
}

return $formats[$pattern] = $tokens;
}

/**
* Pad a numeric value with zero on its left.
*/
Expand Down Expand Up @@ -181,16 +120,19 @@ public function format(
$datetime,
string|DateTimeFormatLength|DateTimeFormatId $pattern_or_length_or_id
): string {
$datetime = $this->ensure_datetime($datetime);
$datetime = new DateTimeAccessor($datetime);
$accessor = new DateTimeAccessor($this->ensure_datetime($datetime));
$pattern = $this->resolve_pattern($pattern_or_length_or_id);
$tokens = self::tokenize($pattern);
$tokens = DateFormatPattern::tokenize($pattern);

$rc = '';

foreach ($tokens as $token) {
if (is_array($token)) { // a callback: method name, repeating chars
$token = $this->{$token[0]}($datetime, $token[1]);
[ $c, $l ] = $token;

$function = self::$formatters[$c] ??
throw new InvalidArgumentException("Invalid date pattern character '$c' used in '$pattern'");
$token = $this->$function($accessor, $l);
}

$rc .= $token;
Expand All @@ -200,20 +142,11 @@ public function format(
}

/**
* Resolves the specified pattern, which can be a width, a skeleton or an actual pattern.
* Resolves the specified pattern, which can be a width, a skeleton, or an actual pattern.
*/
protected function resolve_pattern(
string|DateTimeFormatLength|DateTimeFormatId $pattern_or_length_or_id
): string {
if (is_string($pattern_or_length_or_id) && $pattern_or_length_or_id[0] === ':') {
trigger_error(
"Prefixing date time format ids with ':' is no longer supported, use DateTimeFormatId instead",
E_USER_DEPRECATED
);

$pattern_or_length_or_id = DateTimeFormatId::from(substr($pattern_or_length_or_id, 1));
}

if ($pattern_or_length_or_id instanceof DateTimeFormatLength) {
$length = $pattern_or_length_or_id->value;
$calendar = $this->calendar;
Expand Down
39 changes: 39 additions & 0 deletions tests/DateTimePatternTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

namespace Test\ICanBoogie\CLDR;

use ICanBoogie\CLDR\DateFormatPattern;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

final class DateTimePatternTest extends TestCase
{
#[DataProvider("provide_tokenize")]
public function test_tokenize(string $pattern, array $expected): void
{
$actual = DateFormatPattern::tokenize($pattern);

$this->assertEquals($expected, $actual);
}

public static function provide_tokenize(): array
{
return [

[ 'G', [ [ 'G', 1 ] ] ],
[ 'GG', [ [ 'G', 2 ] ] ],
[ 'GGG', [ [ 'G', 3 ] ] ],
[ 'GGGG', [ [ 'G', 4 ] ] ],
[ 'GGGGG', [ [ 'G', 5 ] ] ],
[ 'E d', [ [ 'E', 1 ], ' ', [ 'd', 1 ] ] ],
[ 'E h:mm a', [ [ 'E', 1 ], ' ', [ 'h', 1 ], ':', [ 'm', 2 ], '', [ 'a', 1] ] ],
[ 'E d/M/y', [ [ 'E', 1 ], ' ', [ 'd', 1 ], '/', [ 'M', 1 ], '/', [ 'y', 1] ] ],
[ "E 'd/M/'y", [ [ 'E', 1 ], " d/M/", [ 'y', 1] ] ],
[ "'week' W 'of' MMMM", [ "week ", [ 'W', 1 ], " of ", [ 'M', 4 ] ] ],
[ "EEE, MMM d, ''yy", [ [ 'E', 3 ], ", ", [ 'M', 3 ], "", [ 'd', 1 ], ", '", [ 'y', 2 ] ] ],
[ "h:mm a", [ [ 'h', 1 ], ":", [ 'm', 2 ], "", [ 'a', 1 ] ] ],
[ "hh 'o''clock' a, zzzz", [ [ 'h', 2 ], " o'clock ", [ 'a', 1 ], ', ', [ 'z', 4 ] ] ],

];
}
}

0 comments on commit 0f57020

Please sign in to comment.