diff --git a/CHANGELOG.md b/CHANGELOG.md index 1273f28d..34171b0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Re-add the `ParentLocales` component, this time as a shared component, [#91](https://github.com/ruby-i18n/ruby-cldr/pull/91) - Changed the keys and values of `ParentLocales` component to be symbols, [#101](https://github.com/ruby-i18n/ruby-cldr/pull/101) - Fixed bug with fallbacks for locales that had more than two segments, [#101](https://github.com/ruby-i18n/ruby-cldr/pull/101) +- Merge all the related data files before doing lookups, [#98](https://github.com/ruby-i18n/ruby-cldr/pull/98) --- diff --git a/lib/cldr/export/data.rb b/lib/cldr/export/data.rb index 67da83c2..e59c8e11 100644 --- a/lib/cldr/export/data.rb +++ b/lib/cldr/export/data.rb @@ -51,6 +51,10 @@ def locales def components self.constants.sort - [:Base, :Export] end + + def paths_by_root + @paths ||= Dir[File.join(dir, "**", "*.xml")].sort.group_by { |path| Nokogiri::XML(File.read(path)).root.name } + end end end end diff --git a/lib/cldr/export/data/aliases.rb b/lib/cldr/export/data/aliases.rb index dfa000cb..e9bdf915 100644 --- a/lib/cldr/export/data/aliases.rb +++ b/lib/cldr/export/data/aliases.rb @@ -36,11 +36,6 @@ def alias_for(alias_tag) ret end end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalMetadata.xml" - end - end end end diff --git a/lib/cldr/export/data/base.rb b/lib/cldr/export/data/base.rb index d3eb3fdd..7c4e01f6 100644 --- a/lib/cldr/export/data/base.rb +++ b/lib/cldr/export/data/base.rb @@ -8,6 +8,8 @@ module Data class Base < Hash attr_reader :locale + @@doc_cache = {} + def initialize(locale) @locale = locale end @@ -53,11 +55,45 @@ def xpath(sources) end def doc - @doc ||= Nokogiri::XML(File.read(path)) + @@doc_cache[paths.hash] ||= merge_paths(paths) + end + + def paths + @paths ||= begin + if locale + Dir[File.join(Cldr::Export::Data.dir, "*", "#{Cldr::Export.from_i18n(locale)}.xml")].sort & Cldr::Export::Data.paths_by_root["ldml"] + else + Cldr::Export::Data.paths_by_root["supplementalData"] + end + end end - def path - @path ||= "#{Cldr::Export::Data.dir}/main/#{Cldr::Export.from_i18n(locale)}.xml" + private + + def merge_paths(paths_to_merge) + # Some parts (`ldml`, `ldmlBCP47` amd `supplementalData`) of CLDR data require that you merge all the + # files with the same root element before doing lookups. + # Ref: https://www.unicode.org/reports/tr35/tr35.html#XML_Format + # + # The return of this method is a merged XML Nokogiri document. + # Note that it technically is no longer compliant with the CLDR `ldml.dtd`, since: + # * it has repeated elements + # * the elements no longer refer to the filename + # + # However, this is not an issue, since #select will find all of the matches from each of the repeated elements, + # and the elements are not important to us / make no sense when combined together. + return Nokogiri::XML('') if paths_to_merge.empty? + + rest = paths_to_merge[1..paths_to_merge.size - 1] + rest.inject(Nokogiri::XML(File.read(paths_to_merge.first))) do |result, path| + next_doc = Nokogiri::XML(File.read(path)) + + next_doc.root.children.each do |child| + result.root.add_child(child) + end + + result + end end end end diff --git a/lib/cldr/export/data/country_codes.rb b/lib/cldr/export/data/country_codes.rb index 9b8c8014..9a78f4bd 100644 --- a/lib/cldr/export/data/country_codes.rb +++ b/lib/cldr/export/data/country_codes.rb @@ -9,7 +9,7 @@ def initialize private - def country_codes + def country_codes doc.xpath("//codeMappings/*").each_with_object({}) do |node, hash| if node.name == "territoryCodes" type = node.attribute('type').to_s.to_sym @@ -17,11 +17,7 @@ def country_codes hash[type]["numeric"] = node[:numeric] if node[:numeric] hash[type]["alpha3"] = node[:alpha3] if node[:alpha3] end - end - end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalData.xml" + end end end end diff --git a/lib/cldr/export/data/likely_subtags.rb b/lib/cldr/export/data/likely_subtags.rb index a1c273cc..04b83dd2 100644 --- a/lib/cldr/export/data/likely_subtags.rb +++ b/lib/cldr/export/data/likely_subtags.rb @@ -18,11 +18,6 @@ def subtags ret end end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/likelySubtags.xml" - end - end end end diff --git a/lib/cldr/export/data/numbering_systems.rb b/lib/cldr/export/data/numbering_systems.rb index eef6cf75..6b4de4b1 100644 --- a/lib/cldr/export/data/numbering_systems.rb +++ b/lib/cldr/export/data/numbering_systems.rb @@ -30,12 +30,7 @@ def numbering_systems ret end end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/numberingSystems.xml" - end - end end end -end \ No newline at end of file +end diff --git a/lib/cldr/export/data/rbnf.rb b/lib/cldr/export/data/rbnf.rb index c82ed92a..c9eab329 100644 --- a/lib/cldr/export/data/rbnf.rb +++ b/lib/cldr/export/data/rbnf.rb @@ -11,17 +11,16 @@ def initialize(*args) end def rule_groups - if File.exist?(path) - select("rbnf/rulesetGrouping").map do |grouping_node| - { - :type => grouping_node.attribute("type").value, - :ruleset => (grouping_node / "ruleset").map do |ruleset_node| - rule_set(ruleset_node) - end - } - end - else - {} + grouping_nodes = select("rbnf/rulesetGrouping") + return {} if grouping_nodes.empty? + + grouping_nodes.map do |grouping_node| + { + :type => grouping_node.attribute("type").value, + :ruleset => (grouping_node / "ruleset").map do |ruleset_node| + rule_set(ruleset_node) + end + } end end @@ -61,11 +60,6 @@ def cast_value(val) def fix_rule(rule) rule.gsub(/\A'/, '').gsub("←", '<').gsub("→", '>') end - - def path - @path ||= "#{Cldr::Export::Data.dir}/rbnf/#{Cldr::Export.from_i18n(locale)}.xml" - end - end end end diff --git a/lib/cldr/export/data/rbnf_root.rb b/lib/cldr/export/data/rbnf_root.rb index 113a4542..9d2b0308 100644 --- a/lib/cldr/export/data/rbnf_root.rb +++ b/lib/cldr/export/data/rbnf_root.rb @@ -9,11 +9,11 @@ def initialize private - def path - @path ||= "#{Cldr::Export::Data.dir}/rbnf/root.xml" + def paths + @paths ||= [File.join(Cldr::Export::Data.dir, "rbnf", "root.xml")] end end end end -end \ No newline at end of file +end diff --git a/lib/cldr/export/data/region_currencies.rb b/lib/cldr/export/data/region_currencies.rb index 0c08db74..51334314 100644 --- a/lib/cldr/export/data/region_currencies.rb +++ b/lib/cldr/export/data/region_currencies.rb @@ -34,11 +34,6 @@ def currency(node) result end end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalData.xml" - end - end end end diff --git a/lib/cldr/export/data/segments_root.rb b/lib/cldr/export/data/segments_root.rb index 9260217e..4e12aa67 100644 --- a/lib/cldr/export/data/segments_root.rb +++ b/lib/cldr/export/data/segments_root.rb @@ -41,8 +41,8 @@ def rules(node) end end - def path - @path ||= "#{Cldr::Export::Data.dir}/segments/root.xml" + def paths + @paths ||= ["#{Cldr::Export::Data.dir}/segments/root.xml"] end def cast_value(value) diff --git a/lib/cldr/export/data/subdivisions.rb b/lib/cldr/export/data/subdivisions.rb index 181e2bf9..05ec3a3a 100644 --- a/lib/cldr/export/data/subdivisions.rb +++ b/lib/cldr/export/data/subdivisions.rb @@ -16,19 +16,6 @@ def subdivisions result end end - - def doc - begin - super - rescue Errno::ENOENT - @doc = Nokogiri::XML('') - end - end - - def path - @path ||= "#{Cldr::Export::Data.dir}/subdivisions/#{Cldr::Export.from_i18n(locale)}.xml" - end - end end end diff --git a/lib/cldr/export/data/transforms.rb b/lib/cldr/export/data/transforms.rb index d123132f..2e84fc2f 100644 --- a/lib/cldr/export/data/transforms.rb +++ b/lib/cldr/export/data/transforms.rb @@ -66,8 +66,8 @@ def fix_rule(rule) gsub("↔", '<>') end - def path - transform_file + def paths + [transform_file] end end diff --git a/lib/cldr/export/data/variables.rb b/lib/cldr/export/data/variables.rb index 7a0f30de..65f4eeeb 100644 --- a/lib/cldr/export/data/variables.rb +++ b/lib/cldr/export/data/variables.rb @@ -31,11 +31,6 @@ def fix_var_name(var_name) def split_value_list(value_list) value_list.strip.split(/[\s]+/) end - - def path - @path ||= "#{Cldr::Export::Data.dir}/supplemental/supplementalMetadata.xml" - end - end end end diff --git a/test/export/data/base_test.rb b/test/export/data/base_test.rb new file mode 100644 index 00000000..6d9152a1 --- /dev/null +++ b/test/export/data/base_test.rb @@ -0,0 +1,53 @@ +# encoding: utf-8 + +require File.expand_path(File.join(File.dirname(__FILE__) + '/../../test_helper')) + +class TestBase < Test::Unit::TestCase + test "#paths finds all the language-dependent data files" do + expected = [ + "annotations/af.xml", + "annotationsDerived/af.xml", + "casing/af.xml", + "collation/af.xml", + "main/af.xml", + "rbnf/af.xml", + "subdivisions/af.xml", + ].map {|f| File.join(Cldr::Export::Data.dir, f)} + assert_equal expected, Cldr::Export::Data::Base.new('af').send(:paths) + end + + test "#paths finds all the supplemental data files" do + expected_non_transform_files = [ + "supplemental/attributeValueValidity.xml", + "supplemental/characters.xml", + "supplemental/coverageLevels.xml", + "supplemental/dayPeriods.xml", + "supplemental/genderList.xml", + "supplemental/languageGroup.xml", + "supplemental/languageInfo.xml", + "supplemental/likelySubtags.xml", + "supplemental/metaZones.xml", + "supplemental/numberingSystems.xml", + "supplemental/ordinals.xml", + "supplemental/pluralRanges.xml", + "supplemental/plurals.xml", + "supplemental/rgScope.xml", + "supplemental/subdivisions.xml", + "supplemental/supplementalData.xml", + "supplemental/supplementalMetadata.xml", + "supplemental/windowsZones.xml", + "validity/currency.xml", + "validity/language.xml", + "validity/region.xml", + "validity/script.xml", + "validity/subdivision.xml", + "validity/unit.xml", + "validity/variant.xml", + ].map {|f| File.join(Cldr::Export::Data.dir, f)} + + supplemental_data_paths = Cldr::Export::Data::Base.new(nil).send(:paths) + + assert_equal expected_non_transform_files, supplemental_data_paths.reject {|p| p.include?("transforms/")} + assert_not_empty supplemental_data_paths.select {|p| p.include?("transforms/")} + end +end