Skip to content

Commit

Permalink
Merge pull request #3 from caxy/feature-nonpartial_word_diffing
Browse files Browse the repository at this point in the history
Feature nonpartial word diffing
  • Loading branch information
mgersten-caxy committed Jun 2, 2014
2 parents d22deb9 + 8e453dd commit 213afcc
Showing 1 changed file with 36 additions and 2 deletions.
38 changes: 36 additions & 2 deletions lib/Caxy/HtmlDiff/HtmlDiff.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class HtmlDiff
private $specialCaseOpeningTags = array();
private $specialCaseClosingTags = array();
private $specialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
private $specialCaseChars = array('.', ',', '(', ')', '\'');
private $groupDiffs = true;

public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = array(), $groupDiffs = true)
Expand All @@ -26,6 +27,31 @@ public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCas

$this->setSpecialCaseTags($specialCaseTags);
}

public function setSpecialCaseChars(array $chars)
{
$this->specialCaseChars = $chars;
}

public function getSpecialCaseChars()
{
return $this->specialCaseChars;
}

public function addSpecialCaseChar($char)
{
if (!in_array($char, $this->specialCaseChars)) {
$this->specialCaseChars[] = $char;
}
}

public function removeSpecialCaseChar($char)
{
$key = array_search($char, $this->specialCaseChars);
if ($key !== false) {
unset($this->specialCaseChars[$key]);
}
}

public function setSpecialCaseTags(array $tags = array())
{
Expand Down Expand Up @@ -173,13 +199,18 @@ private function splitInputsToWords()
$this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
$this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
}

private function isPartOfWord($text)
{
return ctype_alnum(str_replace($this->specialCaseChars, '', $text));
}

private function convertHtmlToListOfWords($characterString)
{
$mode = 'character';
$current_word = '';
$words = array();
foreach ($characterString as $character) {
foreach ($characterString as $i => $character) {
switch ($mode) {
case 'character':
if ( $this->isStartOfTag( $character ) ) {
Expand All @@ -195,7 +226,10 @@ private function convertHtmlToListOfWords($characterString)
$current_word = $character;
$mode = 'whitespace';
} else {
if ( ctype_alnum( $character ) && ( strlen($current_word) == 0 || ctype_alnum( $current_word ) ) ) {
if (
(ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
(in_array($character, $this->specialCaseChars) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
) {
$current_word .= $character;
} else {
$words[] = $current_word;
Expand Down

0 comments on commit 213afcc

Please sign in to comment.