diff --git a/api.h b/api.h index e23e052..fee0733 100644 --- a/api.h +++ b/api.h @@ -339,6 +339,15 @@ varnam_get_all_tokens( varray **tokens ); +/*Copies all word breakers in the symbol table to list +word breakers are used in the libvarnam-ibus project to denote the ending of a word. +However, each scheme file can use different set of characters as word breakers, as +specified in the scheme file. For example, see ml-inscript +*/ +int +varnam_word_breakers(varnam *handle, char *list, int max_count); + + /** * Enable logging. * diff --git a/schemes/ml b/schemes/ml index 6017f25..6e04715 100644 --- a/schemes/ml +++ b/schemes/ml @@ -17,6 +17,13 @@ virama "~" => "്" infer_dead_consonants true +word_breakers "." => ".", + "," => ",", + "?" => "?", + "!" => "!", + "(" => "(", + ")" => ")" + vowels "a" => "അ", [["a"], "aa", "A"] => ["ആ", "ാ"], "i" => ["ഇ", "ി"], diff --git a/schemes/ml-inscript b/schemes/ml-inscript index b4ae9c7..82d0621 100644 --- a/schemes/ml-inscript +++ b/schemes/ml-inscript @@ -16,7 +16,18 @@ infer_dead_consonants false $zwnj = "\u{200c}" $zwj = "\u{200d}" +#word_breakers are symbols that denote the end +#of the sentence the user is typing. When a word +#breaker is encountered, Ibus commits the typed word +#and begins a new word +word_breakers "." => ".", + "," => ",", + "?" => "?", + "!" => "!", + "(" => "(", + ")" => ")" + vowels "D" => "അ", "E" => "ആ", "F" => "ഇ", @@ -82,7 +93,18 @@ consonants "k" => "ക", "J" => "റ", "#" => "്ര", "&" => "ക്ഷ", - "=" => "ൃ" + "=" => "ൃ", + "ൻ" => "ൻ", + "ൺ" => "ൺ", + "ൽ" => "ൽ", + "ൾ" => "ൾ", + "ർ" => "ർ" +#The above chill maps are necessary due to a bug +#inscript treats atomic chill as a token +#However, the token is not in the vst symbols table +#This somehow makes varnam assign the type '10' (VARNAM_TOKEN_OTHER) to the chill +#If a word contains tokens of type 10, it is not learned. +#So the absurd non-sensical chills stay there for the time being numbers "1" => "൧", "2" => "൨", @@ -97,8 +119,10 @@ numbers "1" => "൧", symbols "_" => "ഃ" -others "]" => $zwj, - "\\" => $zwnj +#non-joiner "\\" => $zwnj +joiner "]" => $zwj + + @@ -117,4 +141,4 @@ others "]" => $zwj, - \ No newline at end of file + diff --git a/symbol-table.c b/symbol-table.c index f748fed..9f4e16b 100644 --- a/symbol-table.c +++ b/symbol-table.c @@ -629,6 +629,52 @@ vst_add_metadata (varnam *handle, const char* key, const char* value) return VARNAM_SUCCESS; } +int +vst_get_word_breakers(varnam *handle, strbuf *list) +{ + int rc; + sqlite3 *db; + sqlite3_stmt *stmt; + char *sql = "select pattern from symbols where type=?1"; + + db = handle->internal->db; + + rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); + if(rc != SQLITE_OK) + { + set_last_error(handle, "Failed to prepare statement : %s", sqlite3_errmsg(db)); + sqlite3_finalize(stmt); + return VARNAM_ERROR; + } + + rc = sqlite3_bind_int(stmt, 1, VARNAM_WORD_BREAKER); + if(rc != SQLITE_OK) + { + sqlite3_finalize(stmt); + set_last_error(handle, "Could not bind : %s", sqlite3_errmsg(db)); + return VARNAM_ERROR; + } + + rc = sqlite3_step(stmt); + + while(rc == SQLITE_ROW) + { + strbuf_add(list, sqlite3_column_text(stmt, 0)); + printf("%s\n", strbuf_to_s(list)); + rc = sqlite3_step(stmt); + } + + if(rc != SQLITE_DONE) + { + set_last_error(handle, "%s", sqlite3_errmsg(db)); + sqlite3_finalize(stmt); + return VARNAM_ERROR; + } + + sqlite3_finalize(stmt); + return VARNAM_SUCCESS; +} + int vst_load_scheme_details(varnam *handle, vscheme_details *output) { diff --git a/symbol-table.h b/symbol-table.h index d1100d5..e8e9d7d 100644 --- a/symbol-table.h +++ b/symbol-table.h @@ -107,4 +107,7 @@ vst_stamp_version (varnam *handle); int vst_load_scheme_details(varnam *handle, vscheme_details *output); +int +vst_get_word_breakers(varnam *handle, strbuf *list); + #endif diff --git a/varnam.c b/varnam.c index 1267733..e1d5ad3 100644 --- a/varnam.c +++ b/varnam.c @@ -524,6 +524,31 @@ varnam_get_all_scheme_details() return schemeDetails; } +/*For use with ibus*/ +/*To Do : Document properly*/ +/*allocated - size already allocated to char *word_breakers*/ +int +varnam_word_breakers(varnam *handle, char *word_breakers, int allocated) +{ + int rc; + strbuf *list = get_pooled_string(handle); + + rc = vst_get_word_breakers(handle, list); + if(rc != VARNAM_SUCCESS) + { + set_last_error(handle, "Could not obtain word breakers"); + return VARNAM_ERROR; + } + else + { + if(list->length > allocated) + word_breakers = (char*)realloc(word_breakers, allocated + (list->length - allocated + 1)); + + strcpy(word_breakers, strbuf_to_s(list)); + return VARNAM_SUCCESS; + } +} + int varnam_get_scheme_details(varnam *handle, vscheme_details **details) { diff --git a/varnamc b/varnamc index df455ae..bbad1bd 100755 --- a/varnamc +++ b/varnamc @@ -830,13 +830,13 @@ end def non_joiner(hash) _ensure_sanity(hash) - _create_token(hash, Varnam::VARNAM_TOKEN_NON_JOINER); + _create_token(hash, Varnam::VARNAM_TOKEN_NON_JOINER) $overridden_default_symbols.push Varnam::VARNAM_TOKEN_NON_JOINER end def joiner(hash) _ensure_sanity(hash) - _create_token(hash, Varnam::VARNAM_TOKEN_JOINER); + _create_token(hash, Varnam::VARNAM_TOKEN_JOINER) $overridden_default_symbols.push Varnam::VARNAM_TOKEN_JOINER end @@ -1390,5 +1390,11 @@ def exceptions_stem(hash, options={}) end end end + +def word_breakers(options={}, hash) + _ensure_sanity(hash) + _create_token(hash, Varnam::VARNAM_WORD_BREAKER, options) +end + do_action diff --git a/varnamruby.rb b/varnamruby.rb index 678942e..1198215 100644 --- a/varnamruby.rb +++ b/varnamruby.rb @@ -84,7 +84,8 @@ module Varnam VARNAM_TOKEN_OTHER = 10 VARNAM_TOKEN_NON_JOINER = 11 VARNAM_TOKEN_JOINER = 12 - + VARNAM_WORD_BREAKER = 13 + VARNAM_MATCH_EXACT = 1 VARNAM_MATCH_POSSIBILITY = 2 diff --git a/vtypes.h b/vtypes.h index 5655fc9..a580981 100644 --- a/vtypes.h +++ b/vtypes.h @@ -38,6 +38,7 @@ #define VARNAM_TOKEN_OTHER 10 #define VARNAM_TOKEN_NON_JOINER 11 #define VARNAM_TOKEN_JOINER 12 +#define VARNAM_WORD_BREAKER 13 /* token flags */ #define VARNAM_TOKEN_FLAGS_MORE_MATCHES_FOR_PATTERN (1 << 0)