Skip to content

Commit

Permalink
Merge pull request #98 from movermeyer/movermeyer/merge_before_lookup
Browse files Browse the repository at this point in the history
Merge data files before lookup
  • Loading branch information
movermeyer authored Dec 13, 2021
2 parents 123e959 + d71b940 commit d1bdfef
Show file tree
Hide file tree
Showing 15 changed files with 117 additions and 71 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Re-add the `ParentLocales` component, this time as a shared component, [#91](https://github.com/ruby-i18n/ruby-cldr/pull/91)
- Changed the keys and values of `ParentLocales` component to be symbols, [#101](https://github.com/ruby-i18n/ruby-cldr/pull/101)
- Fixed bug with fallbacks for locales that had more than two segments, [#101](https://github.com/ruby-i18n/ruby-cldr/pull/101)
- Merge all the related data files before doing lookups, [#98](https://github.com/ruby-i18n/ruby-cldr/pull/98)

---

Expand Down
4 changes: 4 additions & 0 deletions lib/cldr/export/data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def locales
def components
self.constants.sort - [:Base, :Export]
end

def paths_by_root
@paths ||= Dir[File.join(dir, "**", "*.xml")].sort.group_by { |path| Nokogiri::XML(File.read(path)).root.name }
end
end
end
end
Expand Down
5 changes: 0 additions & 5 deletions lib/cldr/export/data/aliases.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ def alias_for(alias_tag)
ret
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalMetadata.xml"
end

end
end
end
Expand Down
42 changes: 39 additions & 3 deletions lib/cldr/export/data/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ module Data
class Base < Hash
attr_reader :locale

@@doc_cache = {}

def initialize(locale)
@locale = locale
end
Expand Down Expand Up @@ -53,11 +55,45 @@ def xpath(sources)
end

def doc
@doc ||= Nokogiri::XML(File.read(path))
@@doc_cache[paths.hash] ||= merge_paths(paths)
end

def paths
@paths ||= begin
if locale
Dir[File.join(Cldr::Export::Data.dir, "*", "#{Cldr::Export.from_i18n(locale)}.xml")].sort & Cldr::Export::Data.paths_by_root["ldml"]
else
Cldr::Export::Data.paths_by_root["supplementalData"]
end
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/main/#{Cldr::Export.from_i18n(locale)}.xml"
private

def merge_paths(paths_to_merge)
# Some parts (`ldml`, `ldmlBCP47` amd `supplementalData`) of CLDR data require that you merge all the
# files with the same root element before doing lookups.
# Ref: https://www.unicode.org/reports/tr35/tr35.html#XML_Format
#
# The return of this method is a merged XML Nokogiri document.
# Note that it technically is no longer compliant with the CLDR `ldml.dtd`, since:
# * it has repeated elements
# * the <identity> elements no longer refer to the filename
#
# However, this is not an issue, since #select will find all of the matches from each of the repeated elements,
# and the <identity> elements are not important to us / make no sense when combined together.
return Nokogiri::XML('') if paths_to_merge.empty?

rest = paths_to_merge[1..paths_to_merge.size - 1]
rest.inject(Nokogiri::XML(File.read(paths_to_merge.first))) do |result, path|
next_doc = Nokogiri::XML(File.read(path))

next_doc.root.children.each do |child|
result.root.add_child(child)
end

result
end
end
end
end
Expand Down
8 changes: 2 additions & 6 deletions lib/cldr/export/data/country_codes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,15 @@ def initialize

private

def country_codes
def country_codes
doc.xpath("//codeMappings/*").each_with_object({}) do |node, hash|
if node.name == "territoryCodes"
type = node.attribute('type').to_s.to_sym
hash[type] = {}
hash[type]["numeric"] = node[:numeric] if node[:numeric]
hash[type]["alpha3"] = node[:alpha3] if node[:alpha3]
end
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalData.xml"
end
end
end
end
Expand Down
5 changes: 0 additions & 5 deletions lib/cldr/export/data/likely_subtags.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ def subtags
ret
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/likelySubtags.xml"
end

end
end
end
Expand Down
7 changes: 1 addition & 6 deletions lib/cldr/export/data/numbering_systems.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@ def numbering_systems
ret
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/numberingSystems.xml"
end

end
end
end
end
end
26 changes: 10 additions & 16 deletions lib/cldr/export/data/rbnf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,16 @@ def initialize(*args)
end

def rule_groups
if File.exist?(path)
select("rbnf/rulesetGrouping").map do |grouping_node|
{
:type => grouping_node.attribute("type").value,
:ruleset => (grouping_node / "ruleset").map do |ruleset_node|
rule_set(ruleset_node)
end
}
end
else
{}
grouping_nodes = select("rbnf/rulesetGrouping")
return {} if grouping_nodes.empty?

grouping_nodes.map do |grouping_node|
{
:type => grouping_node.attribute("type").value,
:ruleset => (grouping_node / "ruleset").map do |ruleset_node|
rule_set(ruleset_node)
end
}
end
end

Expand Down Expand Up @@ -61,11 +60,6 @@ def cast_value(val)
def fix_rule(rule)
rule.gsub(/\A'/, '').gsub("←", '<').gsub("→", '>')
end

def path
@path ||= "#{Cldr::Export::Data.dir}/rbnf/#{Cldr::Export.from_i18n(locale)}.xml"
end

end
end
end
Expand Down
6 changes: 3 additions & 3 deletions lib/cldr/export/data/rbnf_root.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def initialize

private

def path
@path ||= "#{Cldr::Export::Data.dir}/rbnf/root.xml"
def paths
@paths ||= [File.join(Cldr::Export::Data.dir, "rbnf", "root.xml")]
end

end
end
end
end
end
5 changes: 0 additions & 5 deletions lib/cldr/export/data/region_currencies.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ def currency(node)
result
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalData.xml"
end

end
end
end
Expand Down
4 changes: 2 additions & 2 deletions lib/cldr/export/data/segments_root.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def rules(node)
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/segments/root.xml"
def paths
@paths ||= ["#{Cldr::Export::Data.dir}/segments/root.xml"]
end

def cast_value(value)
Expand Down
13 changes: 0 additions & 13 deletions lib/cldr/export/data/subdivisions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,6 @@ def subdivisions
result
end
end

def doc
begin
super
rescue Errno::ENOENT
@doc = Nokogiri::XML('')
end
end

def path
@path ||= "#{Cldr::Export::Data.dir}/subdivisions/#{Cldr::Export.from_i18n(locale)}.xml"
end

end
end
end
Expand Down
4 changes: 2 additions & 2 deletions lib/cldr/export/data/transforms.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def fix_rule(rule)
gsub("↔", '<>')
end

def path
transform_file
def paths
[transform_file]
end

end
Expand Down
5 changes: 0 additions & 5 deletions lib/cldr/export/data/variables.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ def fix_var_name(var_name)
def split_value_list(value_list)
value_list.strip.split(/[\s]+/)
end

def path
@path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalMetadata.xml"
end

end
end
end
Expand Down
53 changes: 53 additions & 0 deletions test/export/data/base_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# encoding: utf-8

require File.expand_path(File.join(File.dirname(__FILE__) + '/../../test_helper'))

class TestBase < Test::Unit::TestCase
test "#paths finds all the language-dependent data files" do
expected = [
"annotations/af.xml",
"annotationsDerived/af.xml",
"casing/af.xml",
"collation/af.xml",
"main/af.xml",
"rbnf/af.xml",
"subdivisions/af.xml",
].map {|f| File.join(Cldr::Export::Data.dir, f)}
assert_equal expected, Cldr::Export::Data::Base.new('af').send(:paths)
end

test "#paths finds all the supplemental data files" do
expected_non_transform_files = [
"supplemental/attributeValueValidity.xml",
"supplemental/characters.xml",
"supplemental/coverageLevels.xml",
"supplemental/dayPeriods.xml",
"supplemental/genderList.xml",
"supplemental/languageGroup.xml",
"supplemental/languageInfo.xml",
"supplemental/likelySubtags.xml",
"supplemental/metaZones.xml",
"supplemental/numberingSystems.xml",
"supplemental/ordinals.xml",
"supplemental/pluralRanges.xml",
"supplemental/plurals.xml",
"supplemental/rgScope.xml",
"supplemental/subdivisions.xml",
"supplemental/supplementalData.xml",
"supplemental/supplementalMetadata.xml",
"supplemental/windowsZones.xml",
"validity/currency.xml",
"validity/language.xml",
"validity/region.xml",
"validity/script.xml",
"validity/subdivision.xml",
"validity/unit.xml",
"validity/variant.xml",
].map {|f| File.join(Cldr::Export::Data.dir, f)}

supplemental_data_paths = Cldr::Export::Data::Base.new(nil).send(:paths)

assert_equal expected_non_transform_files, supplemental_data_paths.reject {|p| p.include?("transforms/")}
assert_not_empty supplemental_data_paths.select {|p| p.include?("transforms/")}
end
end

0 comments on commit d1bdfef

Please sign in to comment.