Skip to content

Commit

Permalink
Merge pull request #29 from bolt/fix/mb_str_replace
Browse files Browse the repository at this point in the history
Replace `mb_str_replace` with a different implementation that is much more memory-efficient
  • Loading branch information
bobdenotter authored Oct 4, 2020
2 parents 75cdb71 + 9588a91 commit 728d109
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 32 deletions.
132 changes: 101 additions & 31 deletions src/Str.php
Original file line number Diff line number Diff line change
Expand Up @@ -310,52 +310,123 @@ public static function generatePassword($length = 12)
}

/**
* @see https://gist.github.com/stemar/8287074
* Replace text within a portion of a multi-byte string
*
* Performs a multi-byte safe `{@link substr_replace()}` operation replacing a copy of `string` delimited by
* the `start` and (optionally) `length` parameters with the string given in `replacement`.
*
* @see http://php.net/manual/en/function.substr-replace.php
* @see http://php.net/manual/en/function.mb-substr.php
*
* @param mixed $string The input string.
*
* An array of strings can be provided, in which case the replacements will occur on each string in turn. In this case,
* the `replacement`, `start`, `length` and `encoding` parameters may be provided either as scalar
* values to be applied to each input string in turn, or as arrays, in which case the corresponding array element will
* be used for each input string.
*
* @param mixed $replacement The replacement string.
*
* @param mixed $start If `start` is positive, the replacing will begin at the `start`'th offset into
* `string`.
*
* If `start` is negative, the replacing will begin at the `start`'th character from the end of `string`.
*
* @param mixed $length [optional]
*
* If given and is positive, it represents the length of the portion of `string` which is to be replaced. If it is
* negative, it represents the number of characters from the end of `string` at which to stop replacing. If it is
* not given or equals to <b>NULL</b> or an empty string, then it will default to strlen( `string` ); i.e. end the
* replacing at the end of `string`. If `length` is zero then this function will have the effect of inserting
* `replacement` into `string` at the given `start` offset.
*
* @param mixed $encoding [optional]
*
* The `encoding` parameter is the character encoding. If it is omitted, the internal character encoding value will
* be used.
*
* @see https://gist.github.com/antichris/1dd951752f3da125d382420be21d5b16
*
* @return mixed The result string is returned. If `string` is an array then array is returned.
*/
public static function mb_substr_replace($string, $replacement, $start, $length = null)
public static function mb_substr_replace($string, $replacement, $start, $length = null, $encoding = null)
{
if (!$encoding) {
$encoding = mb_internal_encoding();
}

if (is_array($string)) {
$num = count($string);
$stringCount = count($string);

// $replacement
$replacement = is_array($replacement) ? array_slice($replacement, 0, $num) : array_pad([$replacement], $num, $replacement);
if (is_array($replacement)) {
if (count($replacement) < $stringCount) {
$replacement = array_pad($replacement, $stringCount, '');
}
} else {
$replacement = array_fill(0, $stringCount, $replacement);
}

// $start
if (is_array($start)) {
$start = array_slice($start, 0, $num);
foreach ($start as $key => $value) {
$start[$key] = is_int($value) ? $value : 0;
if (count($start) < $stringCount) {
$start = array_pad($start, $stringCount, 0);
}
} else {
$start = array_fill(0, $stringCount, $start);
}

if (is_array($length)) {
if (count($length) < $stringCount) {
$length = array_pad($length, $stringCount, null);
}
} else {
$start = array_pad([$start], $num, $start);
$length = array_fill(0, $stringCount, $length);
}

// $length
if (! isset($length)) {
$length = array_fill(0, $num, 0);
} elseif (is_array($length)) {
$length = array_slice($length, 0, $num);
foreach ($length as $key => $value) {
$length[$key] = isset($value) ? (is_int($value) ? $value : $num) : 0;
if (is_array($encoding)) {
if (count($encoding) < $stringCount) {
$encoding = array_pad($encoding, $stringCount, mb_internal_encoding());
}
} else {
$length = array_pad([$length], $num, $length);
$encoding = array_fill(0, $stringCount, $encoding);
}

// Recursive call
return array_map(__FUNCTION__, $string, $replacement, $start, $length);
return array_map(__METHOD__, $string, $replacement, $start, $length, $encoding);
}

preg_match_all('/./us', (string) $string, $smatches);
preg_match_all('/./us', (string) $replacement, $rmatches);
$stringLength = mb_strlen($string, $encoding);

if ($length === null) {
$length = mb_strlen($string);
if ($start < 0) {
if (-$start < $stringLength) {
$startNormalized = $stringLength + $start;
} else {
$startNormalized = 0;
}
} else if ($start > $stringLength) {
$startNormalized = $stringLength;
} else {
$startNormalized = $start;
}

array_splice($smatches[0], $start, $length, $rmatches[0]);
if ($length === null || $length === '') {
$start2 = $stringLength;
} else if ($length < 0) {
$start2 = $stringLength + $length;
if ($start2 < $startNormalized) {
$start2 = $startNormalized;
}
} else {
$start2 = $startNormalized + $length;
}

$leader = $startNormalized
? mb_substr($string, 0, $startNormalized, $encoding)
: '';

$trailer = $start2 < $stringLength
? mb_substr($string, $start2, null, $encoding)
: '';

return implode($smatches[0]);
return "{$leader}{$replacement}{$trailer}";
}

public static function placeholders(string $string, array $replacements, bool $caseInsensitive = false): string
Expand Down Expand Up @@ -383,11 +454,10 @@ function ($matches) use ($replacements) {
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
* @see https://github.com/voku/portable-utf8/
*
* @param array $ignore <p>An array of words not to capitalize.</p>
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
* @param array $ignore An array of words not to capitalize.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @return string
* <p>The titleized string.</p>
* @return string The titleized string.
*/
public static function titleCase(
string $str,
Expand Down Expand Up @@ -433,7 +503,7 @@ public static function titleCase(
/** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
$str = (string) \preg_replace_callback(
'~\\b (_*) (?: # 1. Leading underscore and
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) # URL, domain, or email
|
( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' ) # 3. or small word (case-insensitive)
Expand Down
1 change: 0 additions & 1 deletion tests/StrTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ public function testPlaceholder()
'You are the 🍏 to my πŸ‘.',
Str::placeholders('You are the {foo} to my {bar}.', ['foo' => '🍏', 'bar' => 'πŸ‘'], true)
);

}

public function testTitleCase()
Expand Down

0 comments on commit 728d109

Please sign in to comment.