diff --git a/scripts/strings_extractor.py b/scripts/strings_extractor.py index f9dc4da1..c7d71dd1 100755 --- a/scripts/strings_extractor.py +++ b/scripts/strings_extractor.py @@ -201,9 +201,6 @@ def smart_split(text): class TemplateSyntaxError(Exception): pass -plain_strings = [] -translatable_strings = [] - class TranslatableString: _string = '' context = '' @@ -212,25 +209,6 @@ class TranslatableString: def __repr__(self): return "String('%s', '%s', '%s')" % (self._string, self.context, self.plural) - -def get_translatable_filter_args(token): - """ - Find the filter expressions in token and extract the strings in it. - """ - matches = filter_re.finditer(token) - upto = 0 - var_obj = False - for match in matches: - l10nable = match.group("l10nable") - - if l10nable: - # Make sure it's a quoted string - if l10nable.startswith('"') and l10nable.endswith('"') \ - or l10nable.startswith("'") and l10nable.endswith("'"): - ts = TranslatableString() - ts._string = l10nable[1:-1] - translatable_strings.append(ts) - class Token(object): def __init__(self, token_type, contents): # token_type must be TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK or TOKEN_COMMENT. @@ -241,9 +219,57 @@ def __str__(self): ({TOKEN_TEXT: 'Text', TOKEN_VAR: 'Var', TOKEN_BLOCK: 'Block', TOKEN_COMMENT: 'Comment'}[self.token_type], self.contents[:20].replace('\n', '')) - def get_contextual_strings(self): +def create_token(token_string, in_tag): + """ + Convert the given token string into a new Token object and return it. + If in_tag is True, we are processing something that matched a tag, + otherwise it should be treated as a literal string. + """ + if in_tag: + if token_string.startswith(VARIABLE_TAG_START): + token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip()) + elif token_string.startswith(BLOCK_TAG_START): + token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip()) + elif token_string.startswith(COMMENT_TAG_START): + token = Token(TOKEN_COMMENT, '') + else: + token = Token(TOKEN_TEXT, token_string) + return token + +def tokenize(template_string): + + in_tag = False + result = [] + for bit in tag_re.split(template_string): + if bit: + result.append(create_token(bit, in_tag)) + in_tag = not in_tag + return result + +class TranslationOutputter: + translatable_strings = [] + + def get_translatable_filter_args(self, token): + """ + Find the filter expressions in token and extract the strings in it. + """ + matches = filter_re.finditer(token) + upto = 0 + var_obj = False + for match in matches: + l10nable = match.group("l10nable") + + if l10nable: + # Make sure it's a quoted string + if l10nable.startswith('"') and l10nable.endswith('"') \ + or l10nable.startswith("'") and l10nable.endswith("'"): + ts = TranslatableString() + ts._string = l10nable[1:-1] + self.translatable_strings.append(ts) + + def get_contextual_strings(self, token): split = [] - _bits = smart_split(self.contents) + _bits = smart_split(token.contents) _bit = _bits.next() if _bit =="i18n" or _bit == "i18n_var": # {% i18n "A one %1, a two %2, a three %3" var1 var2 var3 %} @@ -258,7 +284,7 @@ def get_contextual_strings(self): translatable_string = TranslatableString() translatable_string._string = _bit[1:-1] - translatable_strings.append(translatable_string) + self.translatable_strings.append(translatable_string) elif _bit =="i18nc" or _bit == "i18nc_var": # {% i18nc "An email send operation failed." "%1 Failed!" var1 %} # {% i18nc_var "An email send operation failed." "%1 Failed!" var1 as result %} @@ -274,7 +300,7 @@ def get_contextual_strings(self): translatable_string.context = _bit[1:-1] _bit = _bits.next() translatable_string._string = _bit[1:-1] - translatable_strings.append(translatable_string) + self.translatable_strings.append(translatable_string) elif _bit =="i18np" or _bit =="i18np_var": # {% i18np "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg %} # {% i18np_var "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg as result %} @@ -290,7 +316,7 @@ def get_contextual_strings(self): translatable_string._string = _bit[1:-1] _bit = _bits.next() translatable_string.plural = _bit[1:-1] - translatable_strings.append(translatable_string) + self.translatable_strings.append(translatable_string) elif _bit =="i18ncp" or _bit =="i18ncp_var": # {% i18np "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count %} # {% i18np_var "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count as result %} @@ -309,7 +335,7 @@ def get_contextual_strings(self): translatable_string._string = _bit[1:-1] _bit = _bits.next() translatable_string.plural = _bit[1:-1] - translatable_strings.append(translatable_string) + self.translatable_strings.append(translatable_string) else: return @@ -317,55 +343,23 @@ def get_contextual_strings(self): if (_bit == "as"): return - get_translatable_filter_args(_bit) + self.get_translatable_filter_args(_bit) - - - def get_plain_strings(self): + def get_plain_strings(self, token): split = [] - bits = iter(smart_split(self.contents)) + bits = iter(smart_split(token.contents)) for bit in bits: - get_translatable_filter_args(bit) - -def create_token(token_string, in_tag): - """ - Convert the given token string into a new Token object and return it. - If in_tag is True, we are processing something that matched a tag, - otherwise it should be treated as a literal string. - """ - if in_tag: - if token_string.startswith(VARIABLE_TAG_START): - token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip()) - elif token_string.startswith(BLOCK_TAG_START): - token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip()) - elif token_string.startswith(COMMENT_TAG_START): - token = Token(TOKEN_COMMENT, '') - else: - token = Token(TOKEN_TEXT, token_string) - return token - -def tokenize(template_string): - - in_tag = False - result = [] - for bit in tag_re.split(template_string): - if bit: - result.append(create_token(bit, in_tag)) - in_tag = not in_tag - return result - -class TranslationOutputter: - - def translate(self, template_file, outputfile): - template_string = template_file.read() - for token in tokenize(template_string): - if token.token_type == TOKEN_VAR or token.token_type == TOKEN_BLOCK: - token.get_plain_strings() - if token.token_type == TOKEN_BLOCK: - token.get_contextual_strings() - global translatable_strings - self.createOutput(os.path.relpath(template_file.name), translatable_strings, outputfile) - translatable_strings = [] - - def createOutput(self, template_filename, translatable_strings, outputfile): - pass + self.get_translatable_filter_args(bit) + + def translate(self, template_file, outputfile): + template_string = template_file.read() + self.translatable_strings = [] + for token in tokenize(template_string): + if token.token_type == TOKEN_VAR or token.token_type == TOKEN_BLOCK: + self.get_plain_strings(token) + if token.token_type == TOKEN_BLOCK: + self.get_contextual_strings(token) + self.createOutput(os.path.relpath(template_file.name), self.translatable_strings, outputfile) + + def createOutput(self, template_filename, translatable_strings, outputfile): + pass