Skip to content

Commit

Permalink
Merge pull request #1045 from libris/feature/lxl-2483-extended-place-…
Browse files Browse the repository at this point in the history
…descriptions-2

Feature/lxl 2483 extended place descriptions 2
  • Loading branch information
olovy authored Nov 23, 2021
2 parents 0a7dda1 + b98a9a1 commit 86bacf8
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class QueryRunner {
"skos" : "http://www.w3.org/2004/02/skos/core#",
"wd" : "http://www.wikidata.org/entity/",
"wdt" : "http://www.wikidata.org/prop/direct/",
"wdtn" : "http://www.wikidata.org/prop/direct-normalized/",
"wikibase": "http://wikiba.se/ontology#"
]

Expand Down
160 changes: 133 additions & 27 deletions whelk-core/src/main/groovy/whelk/external/Wikidata.groovy
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package whelk.external

import groovy.transform.Memoized
import org.apache.jena.query.QuerySolution
import org.apache.jena.query.ResultSet
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
Expand Down Expand Up @@ -28,7 +29,7 @@ class Wikidata implements Mapper {

return Optional.ofNullable(wdEntity.convert())
}

@Override
boolean mightHandle(String iri) {
return isWikidata(iri)
Expand All @@ -42,7 +43,7 @@ class Wikidata implements Mapper {
static boolean isWikidata(String iri) {
iri.startsWith("https://www.wikidata.org") || iri.startsWith("http://www.wikidata.org")
}

static List<String> query(String query, String langTag, int limit) {
try {
performQuery(query, langTag, limit)
Expand All @@ -55,10 +56,10 @@ class Wikidata implements Mapper {
/**
* Search Wikidata using the wbsearchentities API
* Documented here: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities
*
*
* Language parameter: "Search in this language. This only affects how entities are selected, not
* the language in which the results are returned: this is controlled by the "uselang" parameter."
*
*
* @param query the query string
* @param langTag language code for language to search in
* @param limit max number of hits
Expand All @@ -69,7 +70,7 @@ class Wikidata implements Mapper {
def base = 'https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json'
def q = URLEncoder.encode(query, StandardCharsets.UTF_8)
String uri = "$base&limit=$limit&language=$langTag&uselang=$langTag&search=$q"

HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(uri))
.timeout(Duration.ofSeconds(30))
Expand All @@ -80,7 +81,7 @@ class Wikidata implements Mapper {
def result = mapper.readValue(httpResponse.body(), Map.class)
.get('search')
.collect { (String) it['concepturi'] }

return result
}
}
Expand All @@ -91,20 +92,34 @@ class WikidataEntity {

// Wikidata property short ids
static final String COUNTRY = "P17"
static final String DDC = "P1036"
static final String EDITION = "P747"
static final String END_TIME = "P582"
static final String FAST = "P2163"
static final String FREEBASE = "P646"
static final String GEONAMES = "P1566"
static final String INSTANCE_OF = "P31"
static final String PART_OF_PLACE = "P131" // located in the administrative territorial entity
static final String LC_AUTH = "P244"
static final String LOCATED_IN = "P131" // located in the administrative territorial entity
static final String SUBCLASS_OF = "P279"

enum Type {
PLACE('Q618123'), // Geographical feature
PERSON('Q5'), // Human
static final String TORA = "P4820"
static final String YSO = "P2347"

// Wikidata class short ids
static final String GEO_FEATURE = "Q618123"
static final String HUMAN = "Q5"
static final String SWEDISH_MUNI = "Q127448"
static final String SWEDISH_COUNTY = "Q200547"

enum KbvType {
PLACE(GEO_FEATURE),
PERSON(HUMAN),
OTHER('')

String baseClass
String wikidataType

private Type(String baseClass) {
this.baseClass = baseClass
private KbvType(String wikidataType) {
this.wikidataType = wikidataType
}
}

Expand Down Expand Up @@ -137,8 +152,8 @@ class WikidataEntity {

Map convert() {
switch (type()) {
case Type.PLACE: return convertPlace()
case Type.PERSON: return convertPerson()
case KbvType.PLACE: return convertPlace()
case KbvType.PERSON: return convertPerson()
default: return null
}
}
Expand All @@ -155,17 +170,47 @@ class WikidataEntity {
place['prefLabelByLang'] = prefLabel.collectEntries { [it.getLanguage(), it.getLexicalForm()] }

List description = getDescription().findAll { it.getLanguage() in ElasticSearch.LANGUAGES_TO_INDEX }
if (!prefLabel.isEmpty())
if (!description.isEmpty())
place['descriptionByLang'] = description.collectEntries { [it.getLanguage(), it.getLexicalForm()] }

List country = getCountry().findAll { it.toString() != entityIri }
if (!country.isEmpty())
place['country'] = country.collect { ['@id': it.toString()] }

List partOf = getPartOfPlace() - country
List partOf = getLocatedIn() - country
if (!partOf.isEmpty())
place['isPartOf'] = partOf.collect { ['@id': it.toString()] }

List ddc = getDdc().collect { code, edition ->
Map bNode =
[
'@type': "ClassificationDdc",
'code' : code.toString()
]
if (edition)
bNode['edition'] = ['@id': edition.toString()]

return bNode
}

List lcsh = getLcsh().collect {
['@id': it.toString()]
}

List fast = getFast().collect {
['@id': it.toString()]
}

List closeMatches = ddc + lcsh + fast

if (closeMatches) {
place['closeMatch'] = closeMatches
}

List identifiers = getPlaceIdentifiers()
if (!identifiers.isEmpty())
place['exactMatch'] = identifiers.collect { ['@id': it.toString()] }

return place
}

Expand Down Expand Up @@ -207,11 +252,11 @@ class WikidataEntity {
return rs.collect { it.get("country") }
}

List<RDFNode> getPartOfPlace() {
List<RDFNode> getLocatedIn() {
String queryString = """
SELECT ?place {
wd:${shortId} p:${PART_OF_PLACE} ?stmt .
?stmt ps:${PART_OF_PLACE} ?place .
SELECT DISTINCT ?place {
wd:${shortId} p:${LOCATED_IN} ?stmt .
?stmt ps:${LOCATED_IN} ?place .
FILTER NOT EXISTS { ?stmt pq:${END_TIME} ?endTime }
}
"""
Expand All @@ -221,22 +266,83 @@ class WikidataEntity {
return rs.collect { it.get("place") }
}

Type type() {
List<List<RDFNode>> getDdc() {
String queryString = """
SELECT ?code ?edition {
wd:${shortId} wdt:${DDC} ?code ;
wdt:${DDC} ?stmt .
OPTIONAL { ?stmt pq:${EDITION} ?edition }
}
"""

ResultSet rs = QueryRunner.localSelectResult(queryString, graph)

return rs.collect { [it.get("code"), it.get("edition")] }
}

List<RDFNode> getLcsh() {
String queryString = """
SELECT ?id {
wd:${shortId} wdt:${LC_AUTH} ?shortId .
bind(iri(concat("http://id.loc.gov/authorities/subjects/", ?shortId)) as ?id)
FILTER(strstarts(?shortId, "sh"))
}
"""

ResultSet rs = QueryRunner.localSelectResult(queryString, graph)

return rs.collect { it.get("id") }
}

List<RDFNode> getFast() {
String queryString = """
SELECT ?fastId {
wd:${shortId} wdtn:${FAST} ?fastId ;
}
"""

ResultSet rs = QueryRunner.localSelectResult(queryString, graph)

return rs.collect { it.get("fastId") }
}

List<RDFNode> getPlaceIdentifiers() {
String queryString = """
SELECT ?freebaseId ?geonamesId ?toraId ?ysoId {
VALUES ?place { wd:${shortId} }
OPTIONAL { ?place wdtn:${FREEBASE} ?freebaseId }
OPTIONAL { ?place wdtn:${GEONAMES} ?geonamesId }
OPTIONAL { ?place wdt:${TORA} ?toraShortId }
OPTIONAL { ?place wdtn:${YSO} ?ysoId }
bind(iri(concat("https://data.riksarkivet.se/tora/", ?toraShortId)) as ?toraId)
}
"""

ResultSet rs = QueryRunner.localSelectResult(queryString, graph)

QuerySolution singleRowResult = rs.next()

return rs.getResultVars().findResults { singleRowResult?.get(it) }
}

KbvType type() {
String queryString = "SELECT ?type { wd:${shortId} wdt:${INSTANCE_OF} ?type }"

ResultSet rs = QueryRunner.localSelectResult(queryString, graph)
Set wdTypes = rs.collect { it.get("type").toString() } as Set

return Type.values().find { getSubclasses(it).intersect(wdTypes) } ?: Type.OTHER
return KbvType.values().find { getSubclasses(it).intersect(wdTypes) } ?: KbvType.OTHER
}

@Memoized
static Set<String> getSubclasses(Type type) {
if (type == Type.OTHER) {
static Set<String> getSubclasses(KbvType type) {
if (type == KbvType.OTHER) {
return Collections.EMPTY_SET
}

String queryString = "SELECT ?class { ?class wdt:${SUBCLASS_OF}* wd:${type.baseClass} }"
String queryString = "SELECT ?class { ?class wdt:${SUBCLASS_OF}* wd:${type.wikidataType} }"

ResultSet rs = QueryRunner.remoteSelectResult(queryString, WIKIDATA_ENDPOINT)

Expand Down

0 comments on commit 86bacf8

Please sign in to comment.