-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bugfix: DiffMatchPatch Surrogate Pairs Support #581
Changes from all commits
4998e56
a447f84
4968497
6f32ca5
8fa87b6
bc96dd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,6 +112,9 @@ CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2) { | |
char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, i); | ||
|
||
if (char1 != char2) { | ||
if ( CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1) ) { | ||
i = MAX(i - 1, 0); | ||
} | ||
return i; | ||
} | ||
} | ||
|
@@ -142,7 +145,11 @@ CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2) { | |
char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, (text2_length - i)); | ||
|
||
if (char1 != char2) { | ||
return i - 1; | ||
if ( CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1) ) { | ||
return MIN(i - 2, 0); | ||
} | ||
|
||
return i - 1; | ||
} | ||
} | ||
return n; | ||
|
@@ -652,6 +659,10 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { | |
CFStringGetCharacterAtIndex(one, (CFStringGetLength(one) - 1)); | ||
UniChar char2 = | ||
CFStringGetCharacterAtIndex(two, 0); | ||
Boolean char1IsSurrogate = | ||
CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1); | ||
Boolean char2IsSurrogate = | ||
CFStringIsSurrogateLowCharacter(char2) || CFStringIsSurrogateHighCharacter(char1); | ||
Boolean nonAlphaNumeric1 = | ||
!CFCharacterSetIsCharacterMember(alphaNumericSet, char1); | ||
Boolean nonAlphaNumeric2 = | ||
|
@@ -668,7 +679,7 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { | |
lineBreak1 && diff_regExMatch(one, &blankLineEndRegEx); | ||
Boolean blankLine2 = | ||
lineBreak2 && diff_regExMatch(two, &blankLineStartRegEx); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove the whitespace changes? It's making it hard to scan and see if any actual changes occurred here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only two new lines there...
I've relocated all the things because it was next to impossible to understand what was going on. Lemme see if i can revert that... relatively easily! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dmsnell there you go! The changes themselves are very low footprint. And the beauty of it? if there are no surrogate pairs anywhere... this will run the same way it ever did. |
||
if (blankLine1 || blankLine2) { | ||
// Five points for blank lines. | ||
return 5; | ||
|
@@ -681,7 +692,7 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { | |
} else if (whitespace1 || whitespace2) { | ||
// Two points for whitespace. | ||
return 2; | ||
} else if (nonAlphaNumeric1 || nonAlphaNumeric2) { | ||
} else if ((nonAlphaNumeric1 && !char1IsSurrogate) || (nonAlphaNumeric2 && !char2IsSurrogate)) { | ||
// One point for non-alphanumeric. | ||
return 1; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did you see google/diff-match-patch#69 (comment) where new bugs appeared by using this approach?