Skip to content

Commit

Permalink
Apply grammar rules to token replacement values
Browse files Browse the repository at this point in the history
Added support for the “grammaticalization” system in osrm-text-instructions. The Russian grammar file is now converted into a plist and used to inflect road names according to the cases specified in the instructions.
  • Loading branch information
1ec5 committed Oct 11, 2017
1 parent 08de875 commit b0438a8
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 13 deletions.
129 changes: 120 additions & 9 deletions OSRMTextInstructions/OSRMTextInstructions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,55 @@ import MapboxDirections

// Will automatically read localized Instructions.plist
let OSRMTextInstructionsStrings = NSDictionary(contentsOfFile: Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Instructions", ofType: "plist")!)!
let OSRMTextInstructionsGrammar: NSDictionary? = {
guard let path = Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Grammar", ofType: "plist") else {
return nil
}

return NSDictionary(contentsOfFile: path)
}()

extension NSRegularExpression.Options {
init(javaScriptFlags: String) {
var options: NSRegularExpression.Options = []
for flag in javaScriptFlags.characters {
switch flag {
case "g":
break
case "i":
options.insert(.caseInsensitive)
case "m":
options.insert(.anchorsMatchLines)
case "u":
// Character classes are always Unicode-aware in ICU regular expressions.
options.insert(.useUnicodeWordBoundaries)
case "y":
break
default:
break
}
}
self.init(rawValue: options.rawValue)
}
}

protocol Tokenized {
associatedtype T

/**
Replaces `{tokens}` in the receiver using the given closure.
*/
func replacingTokens(using interpolator: ((TokenType) -> T)) -> T
func replacingTokens(using interpolator: ((TokenType, String?) -> T)) -> T

func inflected(into variant: String, version: String) -> T
}

extension String: Tokenized {
public var sentenceCased: String {
return String(characters.prefix(1)).uppercased() + String(characters.dropFirst())
}

public func replacingTokens(using interpolator: ((TokenType) -> String)) -> String {
public func replacingTokens(using interpolator: ((TokenType, String?) -> String)) -> String {
let scanner = Scanner(string: self)
scanner.charactersToBeSkipped = nil
var result = ""
Expand All @@ -38,9 +71,17 @@ extension String: Tokenized {
continue
}

var variant: NSString?
if scanner.scanString(":", into: nil) {
guard scanner.scanUpTo("}", into: &variant) else {
result += ":"
continue
}
}

if scanner.scanString("}", into: nil) {
if let tokenType = TokenType(description: token! as String) {
result += interpolator(tokenType)
result += interpolator(tokenType, variant as String?)
} else {
result += "{\(token!)}"
}
Expand All @@ -59,10 +100,34 @@ extension String: Tokenized {
}
return result
}

func inflected(into variant: String, version: String) -> String {
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
return self
}

guard let rules = grammar[variant] as? [[String]] else {
return self
}

var grammaticalReplacement = " \(self) "
var regularExpressionOptions: NSRegularExpression.Options = []
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
let flags = meta["regExpFlags"] {
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
}

for rule in rules {
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
grammaticalReplacement = regularExpression.stringByReplacingMatches(in: grammaticalReplacement, options: [], range: NSRange(location: 0, length: grammaticalReplacement.characters.count), withTemplate: rule[1])
}

return grammaticalReplacement.trimmingCharacters(in: .whitespaces)
}
}

extension NSAttributedString: Tokenized {
public func replacingTokens(using interpolator: ((TokenType) -> NSAttributedString)) -> NSAttributedString {
public func replacingTokens(using interpolator: ((TokenType, String?) -> NSAttributedString)) -> NSAttributedString {
let scanner = Scanner(string: string)
scanner.charactersToBeSkipped = nil
let result = NSMutableAttributedString()
Expand All @@ -78,12 +143,21 @@ extension NSAttributedString: Tokenized {

var token: NSString?
guard scanner.scanUpTo("}", into: &token) else {
result.append(NSAttributedString(string: "}"))
continue
}

var variant: NSString?
if scanner.scanString(":", into: nil) {
guard scanner.scanUpTo("}", into: &variant) else {
result.append(NSAttributedString(string: "}"))
continue
}
}

if scanner.scanString("}", into: nil) {
if let tokenType = TokenType(description: token! as String) {
result.append(interpolator(tokenType))
result.append(interpolator(tokenType, variant as String?))
}
} else {
result.append(NSAttributedString(string: token! as String))
Expand All @@ -101,6 +175,34 @@ extension NSAttributedString: Tokenized {
}
return result as NSAttributedString
}

@nonobjc func inflected(into variant: String, version: String) -> NSAttributedString {
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
return self
}

guard let rules = grammar[variant] as? [[String]] else {
return self
}

let grammaticalReplacement = NSMutableAttributedString(string: " ")
grammaticalReplacement.append(self)
grammaticalReplacement.append(NSAttributedString(string: " "))

var regularExpressionOptions: NSRegularExpression.Options = []
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
let flags = meta["regExpFlags"] {
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
}

for rule in rules {
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
regularExpression.replaceMatches(in: grammaticalReplacement.mutableString, options: [], range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length), withTemplate: rule[1])
}

grammaticalReplacement.mutableString.replaceOccurrences(of: "^ +| +$", with: "", options: .regularExpression, range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length))
return grammaticalReplacement
}
}

public class OSRMInstructionFormatter: Formatter {
Expand Down Expand Up @@ -323,15 +425,21 @@ public class OSRMInstructionFormatter: Formatter {
let attributedName = NSAttributedString(string: name, attributes: attrs)
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
let phrase = NSAttributedString(string: self.phrase(named: .nameWithCode), attributes: attrs)
wayName = phrase.replacingTokens(using: { (tokenType) -> NSAttributedString in
wayName = phrase.replacingTokens(using: { (tokenType, variant) -> NSAttributedString in
var replacement: NSAttributedString
switch tokenType {
case .wayName:
return modifyValueByKey?(.wayName, attributedName) ?? attributedName
replacement = attributedName
case .code:
return modifyValueByKey?(.code, attributedRef) ?? attributedRef
replacement = attributedRef
default:
fatalError("Unexpected token type \(tokenType) in name-and-ref phrase")
}

if let variant = variant {
replacement = replacement.inflected(into: variant, version: version)
}
return modifyValueByKey?(tokenType, replacement) ?? replacement
})
} else if let ref = ref, isMotorway, let decimalRange = ref.rangeOfCharacter(from: .decimalDigits), !decimalRange.isEmpty {
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
Expand Down Expand Up @@ -411,7 +519,7 @@ public class OSRMInstructionFormatter: Formatter {
if step.finalHeading != nil { bearing = Int(step.finalHeading! as Double) }

// Replace tokens
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType) -> NSAttributedString in
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType, variant) -> NSAttributedString in
var replacement: String
switch tokenType {
case .code: replacement = step.codes?.first ?? ""
Expand All @@ -430,6 +538,9 @@ public class OSRMInstructionFormatter: Formatter {
if tokenType == .wayName {
return wayName // already modified above
} else {
if let variant = variant {
replacement = replacement.inflected(into: variant, version: version)
}
let attributedReplacement = NSAttributedString(string: replacement, attributes: attrs)
return modifyValueByKey?(tokenType, attributedReplacement) ?? attributedReplacement
}
Expand Down
2 changes: 1 addition & 1 deletion OSRMTextInstructionsTests/OSRMTextInstructionsTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class OSRMTextInstructionsTests: XCTestCase {
let fixtureOptions = json["options"] as! [String: String]

let expectedValue = (json["phrases"] as! [String: String])["en"]
let actualValue = phrase?.replacingTokens(using: { (tokenType) -> String in
let actualValue = phrase?.replacingTokens(using: { (tokenType, variant) -> String in
var replacement: String?
switch tokenType {
case .firstInstruction:
Expand Down
12 changes: 10 additions & 2 deletions OSRMTextInstructionsTests/TokenTests.swift
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import XCTest
import OSRMTextInstructions
@testable import OSRMTextInstructions

class TokenTests: XCTestCase {
func testReplacingTokens() {
XCTAssertEqual("Dead Beef", "Dead Beef".replacingTokens { _ in "" })
XCTAssertEqual("Food", "F{ref}{ref}d".replacingTokens { _ in "o" })

XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType) -> String in
XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType, variant) -> String in
switch tokenType {
case .modifier:
return "left"
Expand All @@ -19,8 +19,16 @@ class TokenTests: XCTestCase {
})

XCTAssertEqual("{👿}", "{👿}".replacingTokens { _ in "👼" })
XCTAssertEqual("{👿:}", "{👿:}".replacingTokens { _ in "👼" })
XCTAssertEqual("{👿:💣}", "{👿:💣}".replacingTokens { _ in "👼" })
XCTAssertEqual("{", "{".replacingTokens { _ in "🕳" })
XCTAssertEqual("{💣", "{💣".replacingTokens { _ in "🕳" })
XCTAssertEqual("}", "}".replacingTokens { _ in "🕳" })
}

func testInflectingStrings() {
if Bundle(for: OSRMInstructionFormatter.self).preferredLocalizations.contains(where: { $0.starts(with: "ru") }) {
XCTAssertEqual("Бармалееву улицу", "Бармалеева улица".inflected(into: "accusative", version: "v5"))
}
}
}
16 changes: 15 additions & 1 deletion json2plist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# Transform select osrm-text-instructions language files from json to plist
git submodule init
git submodule update
cd "./osrm-text-instructions/languages/translations/" || exit 1

cd "./osrm-text-instructions/languages/translations/" || exit 1
for file in ./*; do
if [ "$file" = "./en.json" ]; then
LANGUAGE="Base"
Expand All @@ -18,4 +18,18 @@ for file in ./*; do
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Instructions.plist"
done

cd "../grammar/" || exit 1
for file in ./*; do
if [ "$file" = "./en.json" ]; then
LANGUAGE="Base"
else
LANGUAGE=$(basename $file)
LANGUAGE=${LANGUAGE%.json}
fi

LANGUAGE_DIR="${BUILT_PRODUCTS_DIR:-../../../OSRMTextInstructions/}/${UNLOCALIZED_RESOURCES_FOLDER_PATH:-}/${LANGUAGE}.lproj"
mkdir -p "${LANGUAGE_DIR}"
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Grammar.plist"
done

cd - || exit 1

0 comments on commit b0438a8

Please sign in to comment.