Skip to content

Commit

Permalink
replace manual code lists with generated lists
Browse files Browse the repository at this point in the history
* Manually building hashes for code lists was leading to out of data
  lists
* switching to loading all the lists from generated data files
* Thanks to Vivek for prompting me to get around to this
  • Loading branch information
yob committed Oct 18, 2010
1 parent 7e2d133 commit ad02e58
Show file tree
Hide file tree
Showing 14 changed files with 243 additions and 1,205 deletions.
16 changes: 16 additions & 0 deletions bin/onix_extract_codelists
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/ruby
# coding: utf-8

USAGE = "./onix_extract_codelists ONIX_BookProduct_CodeLists.xsd some_dir"

require 'rubygems'
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
require "onix"

unless ARGV.size == 2
puts USAGE
exit(1)
end

extractor = ONIX::CodeListExtractor.new(ARGV.shift)
extractor.run(ARGV.shift)
12 changes: 3 additions & 9 deletions lib/onix.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require 'bigdecimal'
require 'cgi'
require 'singleton'
require 'roxml'
require 'andand'

Expand Down Expand Up @@ -82,18 +83,11 @@ def self.two_digit
require File.join(File.dirname(__FILE__), "onix", "reader")
require File.join(File.dirname(__FILE__), "onix", "writer")

# lists
require File.join(File.dirname(__FILE__), "onix", "lists", "product_form")
require File.join(File.dirname(__FILE__), "onix", "lists", "product_availability")
require File.join(File.dirname(__FILE__), "onix", "lists", "country_code")
require File.join(File.dirname(__FILE__), "onix", "lists", "language_code")
require File.join(File.dirname(__FILE__), "onix", "lists", "language_role")
require File.join(File.dirname(__FILE__), "onix", "lists", "notification_type")
require File.join(File.dirname(__FILE__), "onix", "lists", "product_form_detail")

# product wrappers
require File.join(File.dirname(__FILE__), "onix", "simple_product")
require File.join(File.dirname(__FILE__), "onix", "apa_product")

# misc
require File.join(File.dirname(__FILE__), "onix", "lists")
require File.join(File.dirname(__FILE__), "onix", "normaliser")
require File.join(File.dirname(__FILE__), "onix", "code_list_extractor")
69 changes: 69 additions & 0 deletions lib/onix/code_list_extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# coding: utf-8

module ONIX

# A utility class that processes the code list XSD from the ONIX spec and
# creates a set of TSV files. The generated files are used by this library
# to make hashes of the code lists available to users.
#
class CodeListExtractor

# Creates a new extractor. Expects the path to a copy of the code lists
# file from the spec (called ONIX_BookProduct_CodeLists.xsd on my system).
#
def initialize(filename)
raise ArgumentError, "#{filename} not found" unless File.file?(filename)

@filename = filename
end

# generate a set of TSV files in the given directory. Creates the directory
# if it doesn't exist and will overwrite existing files.
#
def run(dir)
FileUtils.mkdir_p(dir) unless File.directory?(dir)

each_list do |number, data|
#puts number
file = number.to_s.rjust(3, "0") + ".tsv"
path = File.join(dir, file)
File.open(path, "w") { |f| f.write data}
end
end

private

def data
@data ||= File.open(@filename) { |f| f.read }
end

def document
@document ||= Nokogiri::XML(data)
@document.remove_namespaces! if @document.namespaces.size > 0
@document
end

def each_list(&block)
document.xpath("//simpleType").each do |node|
list_name = node.xpath("./@name").first.value
list_number = list_name[/List(\d+)/,1].to_i
if list_number > 0
yield list_number, list_data(list_number)
end
end
end

def list_data(num)
str = ""
nodes = document.xpath("//simpleType[@name='List#{num}']/restriction/enumeration")
nodes.each do |node|
code = node.xpath("./@value").first.value
desc = node.xpath("./annotation/documentation").first.text
ldesc = node.xpath("./annotation/documentation").last.text
str += "#{code}\t#{desc}\t#{ldesc}\n"
end
str
end

end
end
122 changes: 122 additions & 0 deletions lib/onix/lists.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# coding: utf-8

module ONIX

class Lists
include Singleton

# retrieve a hash with the specified code list
#
# ONIX::Lists.list(7)
# => { "BB" => "Hardback", ... }
#
def self.list(number)
self.instance.list(number)
end

# Shortcut to retrieve a common code list
#
def self.audience_code
self.instance.list(28)
end

# Shortcut to retrieve a common code list
#
def self.contributor_role
self.instance.list(17)
end

# Shortcut to retrieve a common code list
#
def self.country_code
self.instance.list(91)
end

# Shortcut to retrieve a common code list
#
def self.language_code
self.instance.list(74)
end

# Shortcut to retrieve a common code list
#
def self.language_role
self.instance.list(22)
end

# Shortcut to retrieve a common code list
#
def self.notification_type
self.instance.list(1)
end

# Shortcut to retrieve a common code list
#
def self.product_availability
self.instance.list(65)
end

# Shortcut to retrieve a common code list
#
def self.product_form
self.instance.list(7)
end

# Shortcut to retrieve a common code list
#
def self.product_form_detail
self.instance.list(78)
end

# return a hash with the data for a single code list.
#
# number should be a fixnum specifying the list to retrieve
#
# ONIX::Lists.instance.list(7)
# => { "BB" => "Hardback", ... }
#
def list(number)
cache[number] ||= build_hash(number)
end

private

def build_hash(number)
val = {}
data(number).each_line do |line|
code, desc, ldesc = *line.split("\t")
code = code.to_i if code.to_s.match(/\d+/)
val[code] = desc
end
val
end

def cache
@cache ||= {}
end

def path(number)
code_dir = File.dirname(__FILE__) + "/../../data/codes"
filename = number.to_s.rjust(3, "0") + ".tsv"
File.join(code_dir, filename)
end

def data(number)
File.open(path(number)) { |f| f.read }
end

public

# These are here for backwards compatability with the onix gem <= 0.8.3
AUDIENCE_CODE = ONIX::Lists.audience_code
CONTRIBUTOR_ROLE = ONIX::Lists.contributor_role
COUNTRY_CODE = ONIX::Lists.country_code
LANGUAGE_CODE = ONIX::Lists.language_code
LANGUAGE_ROLE = ONIX::Lists.language_role
NOTIFICATION_TYPE = ONIX::Lists.notification_type
PRODUCT_AVAILABILITY = ONIX::Lists.product_availability
PRODUCT_FORM = ONIX::Lists.product_form
PRODUCT_FORM_DETAIL = ONIX::Lists.product_form_detail

end
end
17 changes: 0 additions & 17 deletions lib/onix/lists/audience_code.rb

This file was deleted.

99 changes: 0 additions & 99 deletions lib/onix/lists/contributor_role.rb

This file was deleted.

Loading

0 comments on commit ad02e58

Please sign in to comment.