-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
replace manual code lists with generated lists
* Manually building hashes for code lists was leading to out of data lists * switching to loading all the lists from generated data files * Thanks to Vivek for prompting me to get around to this
- Loading branch information
Showing
14 changed files
with
243 additions
and
1,205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/ruby | ||
# coding: utf-8 | ||
|
||
USAGE = "./onix_extract_codelists ONIX_BookProduct_CodeLists.xsd some_dir" | ||
|
||
require 'rubygems' | ||
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib') | ||
require "onix" | ||
|
||
unless ARGV.size == 2 | ||
puts USAGE | ||
exit(1) | ||
end | ||
|
||
extractor = ONIX::CodeListExtractor.new(ARGV.shift) | ||
extractor.run(ARGV.shift) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# coding: utf-8 | ||
|
||
module ONIX | ||
|
||
# A utility class that processes the code list XSD from the ONIX spec and | ||
# creates a set of TSV files. The generated files are used by this library | ||
# to make hashes of the code lists available to users. | ||
# | ||
class CodeListExtractor | ||
|
||
# Creates a new extractor. Expects the path to a copy of the code lists | ||
# file from the spec (called ONIX_BookProduct_CodeLists.xsd on my system). | ||
# | ||
def initialize(filename) | ||
raise ArgumentError, "#{filename} not found" unless File.file?(filename) | ||
|
||
@filename = filename | ||
end | ||
|
||
# generate a set of TSV files in the given directory. Creates the directory | ||
# if it doesn't exist and will overwrite existing files. | ||
# | ||
def run(dir) | ||
FileUtils.mkdir_p(dir) unless File.directory?(dir) | ||
|
||
each_list do |number, data| | ||
#puts number | ||
file = number.to_s.rjust(3, "0") + ".tsv" | ||
path = File.join(dir, file) | ||
File.open(path, "w") { |f| f.write data} | ||
end | ||
end | ||
|
||
private | ||
|
||
def data | ||
@data ||= File.open(@filename) { |f| f.read } | ||
end | ||
|
||
def document | ||
@document ||= Nokogiri::XML(data) | ||
@document.remove_namespaces! if @document.namespaces.size > 0 | ||
@document | ||
end | ||
|
||
def each_list(&block) | ||
document.xpath("//simpleType").each do |node| | ||
list_name = node.xpath("./@name").first.value | ||
list_number = list_name[/List(\d+)/,1].to_i | ||
if list_number > 0 | ||
yield list_number, list_data(list_number) | ||
end | ||
end | ||
end | ||
|
||
def list_data(num) | ||
str = "" | ||
nodes = document.xpath("//simpleType[@name='List#{num}']/restriction/enumeration") | ||
nodes.each do |node| | ||
code = node.xpath("./@value").first.value | ||
desc = node.xpath("./annotation/documentation").first.text | ||
ldesc = node.xpath("./annotation/documentation").last.text | ||
str += "#{code}\t#{desc}\t#{ldesc}\n" | ||
end | ||
str | ||
end | ||
|
||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
# coding: utf-8 | ||
|
||
module ONIX | ||
|
||
class Lists | ||
include Singleton | ||
|
||
# retrieve a hash with the specified code list | ||
# | ||
# ONIX::Lists.list(7) | ||
# => { "BB" => "Hardback", ... } | ||
# | ||
def self.list(number) | ||
self.instance.list(number) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.audience_code | ||
self.instance.list(28) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.contributor_role | ||
self.instance.list(17) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.country_code | ||
self.instance.list(91) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.language_code | ||
self.instance.list(74) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.language_role | ||
self.instance.list(22) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.notification_type | ||
self.instance.list(1) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.product_availability | ||
self.instance.list(65) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.product_form | ||
self.instance.list(7) | ||
end | ||
|
||
# Shortcut to retrieve a common code list | ||
# | ||
def self.product_form_detail | ||
self.instance.list(78) | ||
end | ||
|
||
# return a hash with the data for a single code list. | ||
# | ||
# number should be a fixnum specifying the list to retrieve | ||
# | ||
# ONIX::Lists.instance.list(7) | ||
# => { "BB" => "Hardback", ... } | ||
# | ||
def list(number) | ||
cache[number] ||= build_hash(number) | ||
end | ||
|
||
private | ||
|
||
def build_hash(number) | ||
val = {} | ||
data(number).each_line do |line| | ||
code, desc, ldesc = *line.split("\t") | ||
code = code.to_i if code.to_s.match(/\d+/) | ||
val[code] = desc | ||
end | ||
val | ||
end | ||
|
||
def cache | ||
@cache ||= {} | ||
end | ||
|
||
def path(number) | ||
code_dir = File.dirname(__FILE__) + "/../../data/codes" | ||
filename = number.to_s.rjust(3, "0") + ".tsv" | ||
File.join(code_dir, filename) | ||
end | ||
|
||
def data(number) | ||
File.open(path(number)) { |f| f.read } | ||
end | ||
|
||
public | ||
|
||
# These are here for backwards compatability with the onix gem <= 0.8.3 | ||
AUDIENCE_CODE = ONIX::Lists.audience_code | ||
CONTRIBUTOR_ROLE = ONIX::Lists.contributor_role | ||
COUNTRY_CODE = ONIX::Lists.country_code | ||
LANGUAGE_CODE = ONIX::Lists.language_code | ||
LANGUAGE_ROLE = ONIX::Lists.language_role | ||
NOTIFICATION_TYPE = ONIX::Lists.notification_type | ||
PRODUCT_AVAILABILITY = ONIX::Lists.product_availability | ||
PRODUCT_FORM = ONIX::Lists.product_form | ||
PRODUCT_FORM_DETAIL = ONIX::Lists.product_form_detail | ||
|
||
end | ||
end |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.