Skip to content

Commit

Permalink
[#32] Parse 7XX fields as mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
danmichaelo committed Jul 6, 2017
1 parent 16b2531 commit 3cc7175
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 3 deletions.
13 changes: 11 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ MARC21XML RDF
``681`` Subject Example Tracing Note ``skos:example``
``682`` Deleted Heading Information ``skos:changeNote``
``688`` Application History Note ``skos:historyNote``
``7XX`` Heading Linking Entries ``skos:xxxMatch`` (see below)
========================================================== ===================================================================

Notes:
Expand All @@ -268,7 +269,15 @@ Notes:
classification scheme has been defined in the config.

* SKOS relations are generated from 5XX fields if the fields contain a ``$0``
subfield containing either the control number or the URI of the related record.
subfield containing either a control number or an URI for the related record.
The relationship type is ``skos:broader`` if ``$w=g``, ``skos:narrower`` if ``$w=h``,
and ``skos:related`` otherwise.
If ``$w=r`` and ``$4`` contains an URI, that URI is used as the relationship type.
If ``$w=r`` and ``$4`` contains an URI, that URI is used as the relationship type.

* Mappings/relationships are generated for 7XX headings if the fields contain a ``$0``
subfield containing either the control number or the URI of the related record.
If ``$0`` contains a control number, an URI pattern for the vocabulary
(found in indicator 2 or ``$2``) must be defined in mc2skos.record.CONFIG.
If ``$4`` contains an URI, that URI is used as the relationship type.
Otherwise, if ``$4`` contains one of the ISO 25964 relations, the corresponding
SKOS relation is used. Otherwise, the default value ``skos:closeMatch`` is used.
3 changes: 3 additions & 0 deletions examples/noubomn-c000011.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
dcterms:identifier "REAL000011" ;
dcterms:modified "2016-06-23"^^xsd:date ;
owl:deprecated true ;
skos:closeMatch <http://id.loc.gov/authorities/subjects/sh85086566> ;
skos:exactMatch <http://data.ub.uio.no/humord/c08221>,
<http://www.wikidata.org/entity/Q159341> ;
skos:inScheme <http://data.ub.uio.no/realfagstermer/> ;
skos:prefLabel "Mugg"@nb .

15 changes: 15 additions & 0 deletions examples/noubomn-c000011.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,19 @@
<datafield ind1=" " ind2=" " tag="150">
<subfield code="a">Mugg</subfield>
</datafield>
<datafield ind1=" " ind2="7" tag="750">
<subfield code="a">Muggsopp</subfield>
<subfield code="0">(No-TrBIB)HUME08221</subfield>
<subfield code="2">humord</subfield>
<subfield code="4">=EQ</subfield>
</datafield>
<datafield ind1=" " ind2="7" tag="750">
<subfield code="a">muggsopp</subfield>
<subfield code="0">http://www.wikidata.org/entity/Q159341</subfield>
<subfield code="4">http://www.w3.org/2004/02/skos/core#exactMatch</subfield>
</datafield>
<datafield ind1=" " ind2="0" tag="750">
<subfield code="a">Molds (Fungi)</subfield>
<subfield code="0">sh85086566</subfield>
</datafield>
</record>
43 changes: 42 additions & 1 deletion mc2skos/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
import logging
from iso639 import languages
from rdflib import URIRef
from rdflib.namespace import SKOS

from .constants import Constants
Expand Down Expand Up @@ -696,7 +697,7 @@ def parse(self, options):
elif sf_w == 'h':
relation = SKOS.narrower
elif sf_w == 'r' and is_uri(sf_4):
relation = sf_4
relation = URIRef(sf_4)
else:
relation = SKOS.related

Expand Down Expand Up @@ -754,3 +755,43 @@ def parse(self, options):
# madsrdf:historyNote
for entry in self.record.all('mx:datafield[@tag="688"]'):
self.historyNote.append(entry.stringify())

# 7XX: Heading Linking Entries
for heading in self.get_terms('7'):
sf_4 = heading['node'].text('mx:subfield[@code="4"]')
sf_0 = heading['node'].text('mx:subfield[@code="0"]')

if sf_4 is not None and is_uri(sf_4):
relation = URIRef(sf_4)
else:
relation = {
'=EQ': SKOS.exactMatch,
'~EQ': SKOS.closeMatch,
'BM': SKOS.broadMatch,
'NM': SKOS.narrowMatch,
'RM': SKOS.relatedMatch,
}.get(sf_4)

relation = relation or SKOS.closeMatch # default
if is_uri(sf_0):
self.relations.append({
'uri': sf_0,
'relation': relation,
})
else:
scheme_code = {
'0': 'a', # Library of Congress Subject Headings
'1': 'b', # LC subject headings for children's literature
'2': 'c', # Medical Subject Headings
'3': 'd', # National Agricultural Library subject authority file
'4': 'n', # Source not specified
'5': 'k', # Canadian Subject Headings
'6': 'v', # Répertoire de vedettes-matière
'7': heading['node'].text('mx:subfield[@code="2"]'), # Source specified in subfield $2
}.get(heading['node'].get('ind2'))

self.append_relation(
ConceptScheme(scheme_code, AuthorityRecord),
relation,
control_number=sf_0
)

0 comments on commit 3cc7175

Please sign in to comment.