Apply grammar rules to token replacement values

Added support for the “grammaticalization” system in osrm-text-instructions. The Russian grammar file is now converted into a plist and used to inflect road names according to the cases specified in the instructions.
Project-OSRM · Oct 11, 2017 · b0438a8 · b0438a8
1 parent 08de875
commit b0438a8
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 13 deletions.
diff --git a/OSRMTextInstructions/OSRMTextInstructions.swift b/OSRMTextInstructions/OSRMTextInstructions.swift
@@ -3,22 +3,55 @@ import MapboxDirections
 
 // Will automatically read localized Instructions.plist
 let OSRMTextInstructionsStrings = NSDictionary(contentsOfFile: Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Instructions", ofType: "plist")!)!
+let OSRMTextInstructionsGrammar: NSDictionary? = {
+    guard let path = Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Grammar", ofType: "plist") else {
+        return nil
+    }
+
+    return NSDictionary(contentsOfFile: path)
+}()
+
+extension NSRegularExpression.Options {
+    init(javaScriptFlags: String) {
+        var options: NSRegularExpression.Options = []
+        for flag in javaScriptFlags.characters {
+            switch flag {
+            case "g":
+                break
+            case "i":
+                options.insert(.caseInsensitive)
+            case "m":
+                options.insert(.anchorsMatchLines)
+            case "u":
+                // Character classes are always Unicode-aware in ICU regular expressions.
+                options.insert(.useUnicodeWordBoundaries)
+            case "y":
+                break
+            default:
+                break
+            }
+        }
+        self.init(rawValue: options.rawValue)
+    }
+}
 
 protocol Tokenized {
     associatedtype T
 
     /**
      Replaces `{tokens}` in the receiver using the given closure.
      */
-    func replacingTokens(using interpolator: ((TokenType) -> T)) -> T
+    func replacingTokens(using interpolator: ((TokenType, String?) -> T)) -> T
+
+    func inflected(into variant: String, version: String) -> T
 }
 
 extension String: Tokenized {
     public var sentenceCased: String {
         return String(characters.prefix(1)).uppercased() + String(characters.dropFirst())
     }
 
-    public func replacingTokens(using interpolator: ((TokenType) -> String)) -> String {
+    public func replacingTokens(using interpolator: ((TokenType, String?) -> String)) -> String {
         let scanner = Scanner(string: self)
         scanner.charactersToBeSkipped = nil
         var result = ""
@@ -38,9 +71,17 @@ extension String: Tokenized {
                 continue
             }
 
+            var variant: NSString?
+            if scanner.scanString(":", into: nil) {
+                guard scanner.scanUpTo("}", into: &variant) else {
+                    result += ":"
+                    continue
+                }
+            }
+
             if scanner.scanString("}", into: nil) {
                 if let tokenType = TokenType(description: token! as String) {
-                    result += interpolator(tokenType)
+                    result += interpolator(tokenType, variant as String?)
                 } else {
                     result += "{\(token!)}"
                 }
@@ -59,10 +100,34 @@ extension String: Tokenized {
         }
         return result
     }
+
+    func inflected(into variant: String, version: String) -> String {
+        guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
+            return self
+        }
+
+        guard let rules = grammar[variant] as? [[String]] else {
+            return self
+        }
+
+        var grammaticalReplacement = " \(self) "
+        var regularExpressionOptions: NSRegularExpression.Options = []
+        if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
+            let flags = meta["regExpFlags"] {
+            regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
+        }
+
+        for rule in rules {
+            let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
+            grammaticalReplacement = regularExpression.stringByReplacingMatches(in: grammaticalReplacement, options: [], range: NSRange(location: 0, length: grammaticalReplacement.characters.count), withTemplate: rule[1])
+        }
+
+        return grammaticalReplacement.trimmingCharacters(in: .whitespaces)
+    }
 }
 
 extension NSAttributedString: Tokenized {
-    public func replacingTokens(using interpolator: ((TokenType) -> NSAttributedString)) -> NSAttributedString {
+    public func replacingTokens(using interpolator: ((TokenType, String?) -> NSAttributedString)) -> NSAttributedString {
         let scanner = Scanner(string: string)
         scanner.charactersToBeSkipped = nil
         let result = NSMutableAttributedString()
@@ -78,12 +143,21 @@ extension NSAttributedString: Tokenized {
 
             var token: NSString?
             guard scanner.scanUpTo("}", into: &token) else {
+                result.append(NSAttributedString(string: "}"))
                 continue
             }
 
+            var variant: NSString?
+            if scanner.scanString(":", into: nil) {
+                guard scanner.scanUpTo("}", into: &variant) else {
+                    result.append(NSAttributedString(string: "}"))
+                    continue
+                }
+            }
+
             if scanner.scanString("}", into: nil) {
                 if let tokenType = TokenType(description: token! as String) {
-                    result.append(interpolator(tokenType))
+                    result.append(interpolator(tokenType, variant as String?))
                 }
             } else {
                 result.append(NSAttributedString(string: token! as String))
@@ -101,6 +175,34 @@ extension NSAttributedString: Tokenized {
         }
         return result as NSAttributedString
     }
+
+    @nonobjc func inflected(into variant: String, version: String) -> NSAttributedString {
+        guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
+            return self
+        }
+
+        guard let rules = grammar[variant] as? [[String]] else {
+            return self
+        }
+
+        let grammaticalReplacement = NSMutableAttributedString(string: " ")
+        grammaticalReplacement.append(self)
+        grammaticalReplacement.append(NSAttributedString(string: " "))
+
+        var regularExpressionOptions: NSRegularExpression.Options = []
+        if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
+            let flags = meta["regExpFlags"] {
+            regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
+        }
+
+        for rule in rules {
+            let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
+            regularExpression.replaceMatches(in: grammaticalReplacement.mutableString, options: [], range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length), withTemplate: rule[1])
+        }
+
+        grammaticalReplacement.mutableString.replaceOccurrences(of: "^ +| +$", with: "", options: .regularExpression, range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length))
+        return grammaticalReplacement
+    }
 }
 
 public class OSRMInstructionFormatter: Formatter {
@@ -323,15 +425,21 @@ public class OSRMInstructionFormatter: Formatter {
                 let attributedName = NSAttributedString(string: name, attributes: attrs)
                 let attributedRef = NSAttributedString(string: ref, attributes: attrs)
                 let phrase = NSAttributedString(string: self.phrase(named: .nameWithCode), attributes: attrs)
-                wayName = phrase.replacingTokens(using: { (tokenType) -> NSAttributedString in
+                wayName = phrase.replacingTokens(using: { (tokenType, variant) -> NSAttributedString in
+                    var replacement: NSAttributedString
                     switch tokenType {
                     case .wayName:
-                        return modifyValueByKey?(.wayName, attributedName) ?? attributedName
+                        replacement = attributedName
                     case .code:
-                        return modifyValueByKey?(.code, attributedRef) ?? attributedRef
+                        replacement = attributedRef
                     default:
                         fatalError("Unexpected token type \(tokenType) in name-and-ref phrase")
                     }
+
+                    if let variant = variant {
+                        replacement = replacement.inflected(into: variant, version: version)
+                    }
+                    return modifyValueByKey?(tokenType, replacement) ?? replacement
                 })
             } else if let ref = ref, isMotorway, let decimalRange = ref.rangeOfCharacter(from: .decimalDigits), !decimalRange.isEmpty {
                 let attributedRef = NSAttributedString(string: ref, attributes: attrs)
@@ -411,7 +519,7 @@ public class OSRMInstructionFormatter: Formatter {
         if step.finalHeading != nil { bearing = Int(step.finalHeading! as Double) }
 
         // Replace tokens
-        let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType) -> NSAttributedString in
+        let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType, variant) -> NSAttributedString in
             var replacement: String
             switch tokenType {
             case .code: replacement = step.codes?.first ?? ""
@@ -430,6 +538,9 @@ public class OSRMInstructionFormatter: Formatter {
             if tokenType == .wayName {
                 return wayName // already modified above
             } else {
+                if let variant = variant {
+                    replacement = replacement.inflected(into: variant, version: version)
+                }
                 let attributedReplacement = NSAttributedString(string: replacement, attributes: attrs)
                 return modifyValueByKey?(tokenType, attributedReplacement) ?? attributedReplacement
             }

diff --git a/OSRMTextInstructionsTests/OSRMTextInstructionsTests.swift b/OSRMTextInstructionsTests/OSRMTextInstructionsTests.swift
@@ -48,7 +48,7 @@ class OSRMTextInstructionsTests: XCTestCase {
                     let fixtureOptions = json["options"] as! [String: String]
 
                     let expectedValue = (json["phrases"] as! [String: String])["en"]
-                    let actualValue = phrase?.replacingTokens(using: { (tokenType) -> String in
+                    let actualValue = phrase?.replacingTokens(using: { (tokenType, variant) -> String in
                         var replacement: String?
                         switch tokenType {
                         case .firstInstruction:

diff --git a/OSRMTextInstructionsTests/TokenTests.swift b/OSRMTextInstructionsTests/TokenTests.swift
@@ -1,12 +1,12 @@
 import XCTest
-import OSRMTextInstructions
+@testable import OSRMTextInstructions
 
 class TokenTests: XCTestCase {
     func testReplacingTokens() {
         XCTAssertEqual("Dead Beef", "Dead Beef".replacingTokens { _ in "" })
         XCTAssertEqual("Food", "F{ref}{ref}d".replacingTokens { _ in "o" })
 
-        XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType) -> String in
+        XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType, variant) -> String in
             switch tokenType {
             case .modifier:
                 return "left"
@@ -19,8 +19,16 @@ class TokenTests: XCTestCase {
         })
 
         XCTAssertEqual("{👿}", "{👿}".replacingTokens { _ in "👼" })
+        XCTAssertEqual("{👿:}", "{👿:}".replacingTokens { _ in "👼" })
+        XCTAssertEqual("{👿:💣}", "{👿:💣}".replacingTokens { _ in "👼" })
         XCTAssertEqual("{", "{".replacingTokens { _ in "🕳" })
         XCTAssertEqual("{💣", "{💣".replacingTokens { _ in "🕳" })
         XCTAssertEqual("}", "}".replacingTokens { _ in "🕳" })
     }
+
+    func testInflectingStrings() {
+        if Bundle(for: OSRMInstructionFormatter.self).preferredLocalizations.contains(where: { $0.starts(with: "ru") }) {
+            XCTAssertEqual("Бармалееву улицу", "Бармалеева улица".inflected(into: "accusative", version: "v5"))
+        }
+    }
 }
diff --git a/json2plist.sh b/json2plist.sh
@@ -3,8 +3,8 @@
 # Transform select osrm-text-instructions language files from json to plist
 git submodule init
 git submodule update
-cd "./osrm-text-instructions/languages/translations/" || exit 1
 
+cd "./osrm-text-instructions/languages/translations/" || exit 1
 for file in ./*; do
     if [ "$file" = "./en.json" ]; then
       LANGUAGE="Base"
@@ -18,4 +18,18 @@ for file in ./*; do
     plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Instructions.plist"
 done
 
+cd "../grammar/" || exit 1
+for file in ./*; do
+    if [ "$file" = "./en.json" ]; then
+      LANGUAGE="Base"
+    else
+      LANGUAGE=$(basename $file)
+      LANGUAGE=${LANGUAGE%.json}
+    fi
+
+    LANGUAGE_DIR="${BUILT_PRODUCTS_DIR:-../../../OSRMTextInstructions/}/${UNLOCALIZED_RESOURCES_FOLDER_PATH:-}/${LANGUAGE}.lproj"
+    mkdir -p "${LANGUAGE_DIR}"
+    plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Grammar.plist"
+done
+
 cd - || exit 1