diff --git a/jspdocportal-diskcache/pom.xml b/jspdocportal-diskcache/pom.xml index 8fdaf6e5..4087776d 100644 --- a/jspdocportal-diskcache/pom.xml +++ b/jspdocportal-diskcache/pom.xml @@ -11,8 +11,8 @@ jspdocportal-diskcache Cacheing and generation of derived files - 13 - + 13 + + + org.mycore mycore-iiif + + + org.mycore.libmeta + libmeta-mets + 0.8.1 + + + + org.mycore.libmeta + libmeta-mods + 0.8.1 + + jakarta.servlet jakarta.servlet-api - - - org.apache.maven.plugins - maven-jar-plugin - - - - true - - - - ${project.artifactId} - ${project.name} - ${manifest.priority} - - - - - + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + + + + ${project.artifactId} + ${project.name} + ${manifest.priority} + + + + + diff --git a/jspdocportal-diskcache/src/main/java/org/mycore/jspdocportal/diskcache/generator/MCRDiskcacheDVMETSGenerator.java b/jspdocportal-diskcache/src/main/java/org/mycore/jspdocportal/diskcache/generator/MCRDiskcacheDVMETSGenerator.java new file mode 100644 index 00000000..a7452c51 --- /dev/null +++ b/jspdocportal-diskcache/src/main/java/org/mycore/jspdocportal/diskcache/generator/MCRDiskcacheDVMETSGenerator.java @@ -0,0 +1,592 @@ +package org.mycore.jspdocportal.diskcache.generator; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Stream; + +import javax.xml.namespace.QName; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jdom2.Element; +import org.jdom2.JDOMException; +import org.jdom2.output.DOMOutputter; +import org.mycore.common.content.MCRPathContent; +import org.mycore.datamodel.classifications2.MCRCategory; +import org.mycore.datamodel.classifications2.MCRCategoryDAO; +import org.mycore.datamodel.classifications2.MCRCategoryDAOFactory; +import org.mycore.datamodel.classifications2.MCRCategoryID; +import org.mycore.datamodel.metadata.MCRMetaEnrichedLinkID; +import org.mycore.datamodel.metadata.MCRMetaXML; +import org.mycore.datamodel.metadata.MCRMetadataManager; +import org.mycore.datamodel.metadata.MCRObject; +import org.mycore.datamodel.metadata.MCRObjectID; +import org.mycore.datamodel.niofs.MCRPath; +import org.mycore.frontend.MCRFrontendUtil; +import org.mycore.libmeta.common.LibmetaProcessorException; +import org.mycore.libmeta.mets.METSQuery; +import org.mycore.libmeta.mets.METSXMLProcessor; +import org.mycore.libmeta.mets.model.Mets; +import org.mycore.libmeta.mets.model._enums.AgentROLE; +import org.mycore.libmeta.mets.model._enums.LOCTYPE; +import org.mycore.libmeta.mets.model._enums.MDTYPE; +import org.mycore.libmeta.mets.model.div.Fptr; +import org.mycore.libmeta.mets.model.div.Mptr; +import org.mycore.libmeta.mets.model.filesec.File; +import org.mycore.libmeta.mets.model.filesec.FileGrp; +import org.mycore.libmeta.mets.model.filesec.file.FLocat; +import org.mycore.libmeta.mets.model.mdsec.AmdSec; +import org.mycore.libmeta.mets.model.mdsec.MdSec; +import org.mycore.libmeta.mets.model.mdsec.MdWrap; +import org.mycore.libmeta.mets.model.mdsec.XMLData; +import org.mycore.libmeta.mets.model.metshdr.Agent; +import org.mycore.libmeta.mets.model.metshdr.MetsHdr; +import org.mycore.libmeta.mets.model.structmap.Div; +import org.mycore.libmeta.mets.model.structmap.StructMap; +import org.mycore.libmeta.mods.MODSXMLProcessor; +import org.mycore.libmeta.mods.model.Mods; +import org.mycore.libmeta.mods.model._toplevel.Classification; +import org.mycore.libmeta.mods.model._toplevel.ITopLevelElement; +import org.mycore.libmeta.mods.model._toplevel.Identifier; +import org.mycore.libmeta.mods.model._toplevel.RecordInfo; +import org.mycore.libmeta.mods.model.recordinfo.RecordInfoNote; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonParser; + +public class MCRDiskcacheDVMETSGenerator extends SimpleGenerator { + + private static final Logger LOGGER = LogManager.getLogger(); + + private static DocumentBuilderFactory DBF; + + static { + DBF = DocumentBuilderFactory.newInstance(); + DBF.setNamespaceAware(true); + } + + @Override + public void accept(String id, Path p) { + if (MCRObjectID.isValid(id)) { + MCRObject mcrObj = MCRMetadataManager.retrieveMCRObject(MCRObjectID.getInstance(id)); + Optional optDerLink = mcrObj.getStructure().getDerivates().stream() + .filter(x -> x.getClassifications().contains(MCRCategoryID.fromString("derivate_types:REPOS_METS"))) + .findFirst(); + + Optional optMetaMODS = mcrObj.getMetadata().getMetadataElement("def.modsContainer").stream() + .filter(x -> "imported".equals(x.getType())).findFirst().map(x -> (MCRMetaXML) x); + + if (optMetaMODS.isPresent() && optDerLink.isPresent()) { + Element eMODS = getFirstContentElement(optMetaMODS.get()); + DOMOutputter out = new DOMOutputter(); + + try { + Mods mods = MODSXMLProcessor.getInstance().unmarshal(out.output(eMODS)); + + MCRMetaEnrichedLinkID derLink = optDerLink.get(); + derLink.getXLinkHref(); + MCRPath metsFile = MCRPath.getPath(derLink.getXLinkHref(), derLink.getMainDoc()); + if (Files.exists(metsFile)) { + + try { + //TODO use XML source: unmarshal(MCRPathContent.getSource()) + byte[] metsXML = new MCRPathContent(metsFile).asByteArray(); + Mets mets + = METSXMLProcessor.getInstance().unmarshal(new String(metsXML, StandardCharsets.UTF_8)); + Files.deleteIfExists(p); + process(mets, mods, p); + } catch (IOException e) { + LOGGER.error(e); + } + } + + } catch (LibmetaProcessorException | JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + } + + protected Gson getGson() { + return new GsonBuilder() + .setPrettyPrinting() + .create(); + } + + private void process(Mets mets, Mods mods, Path pMetsOut) { + try { + updateHeader(mets); + updateAMDSectionForDFGViewer(mets, mods); + createPresentationFileGroups(mets); + updateAltoFileGrp(mets); + addPDFDownloadForCompleteDocument(mets); + addTeaserFileGroup(mets); + updateDMDSec(mets, mods); + removePicaDMDSec(mets); + updateMetsPtr(mets); + + METSXMLProcessor.getInstance().marshal(mets, pMetsOut); + } catch (LibmetaProcessorException e) { + LOGGER.error(e); + } + } + + private void createPresentationFileGroups(Mets mets) { + //delete all fileGroups - keep ALTO + mets.getFileSec().getFileGrp().removeIf(fg -> !"ALTO".equals(fg.getID())); + StructMap smPhys = METSQuery.findStructMap(mets, "PHYSICAL"); + if (smPhys != null) { + // delete all existing filePointers - keep ALTO; + smPhys.getDiv().getFptr().clear(); + for (Div d : smPhys.getDiv().getDiv()) { + d.getFptr().removeIf(fptr -> !fptr.getFILEID().startsWith("ALTO_")); + } + + String iiifBaseURL = MCRFrontendUtil.getBaseURL() + "api/iiif/image/v2"; + String[] fgNames = new String[] { "DEFAULT", "THUMBS" }; + Map iiifSizes = Map.of("DEFAULT", "max", "THUMBS", "!256,256"); + for (String fgName : fgNames) { + fgName = fgName.trim(); + FileGrp fg = METSQuery.findOrCreateFileGrpOfUse(mets, fgName); + + for (Div divPhys : smPhys.getDiv().getDiv()) { + String fileID = divPhys.getID().replace("phys_", fgName + "_file_"); + Fptr fptr = new Fptr(); + + fptr.setFILEID(fileID); + divPhys.getFptr().add(fptr); + + File f = new File(); + f.setID(fileID); + f.setMIMETYPE("image/jpeg"); + fg.getFile().add(f); + + FLocat flocat = new FLocat(); + flocat.setLOCTYPE(LOCTYPE.URL); + // https://rosdok.uni-rostock.de/iiif/image-api/rosdok_ppn642329060__phys_0008/full/max/0/native.pdf + flocat.setXlinkHref(iiifBaseURL + "/" + retrieveIIIFID(divPhys) + + "/full/" + iiifSizes.get(fgName) + "/0/default.jpg"); + f.getFLocat().add(flocat); + } + } + } + } + + private String retrieveIIIFID(Div divPhys) { + String purl = divPhys.getCONTENTIDS().get(0).toString(); + String iiifID = purl.replace("//", ""); + iiifID = iiifID.substring(iiifID.indexOf("/") + 1); + iiifID = iiifID.replace("/", "_").replace("_phys", "__phys"); + return iiifID; + } + + private void updateAMDSectionForDFGViewer(Mets mets, Mods mods) { + QName qnameURI = new QName("http://ub.uni-rostock.de", "uri"); + String providerURI + = mets.getMetsHdr().getAgent().stream() + .filter(x -> "PROVIDER".equals(x.getOTHERROLE())) + .findFirst() + .map(x -> x.getNote().get(0).getOtherAttributes().get(qnameURI)) + .orElse(null); + String provider = providerURI == null ? null : providerURI.substring(providerURI.lastIndexOf("#") + 1); + + String sponsorURI + = mets.getMetsHdr().getAgent().stream().filter(x -> "SPONSOR".equals(x.getOTHERROLE())).findFirst() + .map(x -> x.getNote().get(0).getOtherAttributes().get(qnameURI)) + .orElse(null); + String sponsor = sponsorURI == null ? null : sponsorURI.substring(sponsorURI.lastIndexOf("#") + 1); + + // TODO RecordInfo ri = mods.filterContent(RecordInfo.class).get(0); + // und weiter mit: ri.filterContent(RecordInfoNOte.class).stream()... + RecordInfo ri = getMODSChildren(mods, RecordInfo.class).get(0); + String ppn = ri.getContent().stream() + .filter(RecordInfoNote.class::isInstance) + .map(RecordInfoNote.class::cast) + .filter(x -> "k10plus_ppn".equals(x.getType())) + .findFirst() + .get().getContent(); + + String license = retrieveLicense(mods); + MCRCategoryDAO categDAO = MCRCategoryDAOFactory.getInstance(); + + try { + DocumentBuilder db = DBF.newDocumentBuilder(); + Document document = db.newDocument(); + mets.getAmdSec().removeIf(x -> "AMD_DFGVIEWER".equals(x.getID())); + AmdSec amdSec = new AmdSec(); + amdSec.setID("AMD_DFGVIEWER"); + mets.getAmdSec().add(0, amdSec); + + org.w3c.dom.Element eDvRights = document.createElementNS("http://dfg-viewer.de/", "dv:rights"); + org.w3c.dom.Element e = null; + if (provider != null) { + MCRCategory catProvider = categDAO.getCategory(new MCRCategoryID("provider", provider), 0); + catProvider.getLabel("de").ifPresent(l -> { + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:owner"); + e2.setTextContent(l.getText()); + eDvRights.appendChild(e2); + }); + + catProvider.getLabel("x-dfg-viewer").ifPresent(l -> { + String url = JsonParser.parseString(l.getText()).getAsJsonObject().get("logo_url").getAsString(); + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:ownerLogo"); + e2.setTextContent(url); + eDvRights.appendChild(e2); + }); + + catProvider.getLabel("x-homepage").ifPresent(l -> { + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:ownerSiteURL"); + e2.setTextContent(l.getText()); + eDvRights.appendChild(e2); + }); + + catProvider.getLabel("x-contact").ifPresent(l -> { + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:ownerContact"); + e2.setTextContent(l.getText()); + eDvRights.appendChild(e2); + }); + } + if (sponsor != null) { + MCRCategory catSponsor = categDAO.getCategory(new MCRCategoryID("sponsor", sponsor), 0); + catSponsor.getLabel("de").ifPresent(l -> { + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:sponsor"); + e2.setTextContent(l.getText()); + eDvRights.appendChild(e2); + }); + catSponsor.getLabel("x-dfg-viewer").ifPresent(l -> { + String url = JsonParser.parseString(l.getText()).getAsJsonObject().get("logo_url").getAsString(); + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:sponsorLogo"); + e2.setTextContent(url); + eDvRights.appendChild(e2); + }); + catSponsor.getLabel("x-homepage").ifPresent(l -> { + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:sponsorSiteURL"); + e2.setTextContent(l.getText()); + eDvRights.appendChild(e2); + }); + } + if (license != null) { + e = document.createElementNS("http://dfg-viewer.de/", "dv:license"); + e.setTextContent(license); + eDvRights.appendChild(e); + } + if (provider != null) { + org.w3c.dom.Element eDvLinks = document.createElementNS("http://dfg-viewer.de/", "dv:links"); + + XMLData xmlData = new XMLData(); + xmlData.getNodes().add(eDvRights); + MdWrap.Builder mdWrapBuilder = MdWrap.builder() + .xmlData(xmlData) + .MDTYPE(MDTYPE.OTHER) + .MIMETYPE("text/xml") + .OTHERMDTYPE("DVRIGHTS"); + + MdSec rightsMD = new MdSec(); + rightsMD.setID("RIGHTS"); + rightsMD.setMdWrap(mdWrapBuilder.build()); + amdSec.getRightsMD().add(rightsMD); + + MCRCategory catProvider = categDAO.getCategory(new MCRCategoryID("provider", provider), 0); + catProvider.getLabel("x-catalog").ifPresent(l -> { + String url = JsonParser.parseString(l.getText()).getAsJsonObject().get("opac").getAsString(); + org.w3c.dom.Element e2 = document.createElementNS("http://dfg-viewer.de/", "dv:reference"); + e2.setTextContent(url.replace("{0}", ppn)); + eDvLinks.appendChild(e2); + }); + + // set PURL + getMODSChildren(mods, Identifier.class).stream() + .filter(i -> "purl".equals(i.getType())).findFirst().ifPresent(p -> { + org.w3c.dom.Element e3 = document.createElementNS("http://dfg-viewer.de/", "dv:presentation"); + e3.setTextContent(p.getContent()); + eDvLinks.appendChild(e3); + }); + + xmlData = new XMLData(); + xmlData.getNodes().add(eDvLinks); + MdWrap.Builder mdWrapBuilder2 = MdWrap.builder() + .xmlData(xmlData) + .MDTYPE(MDTYPE.OTHER) + .MIMETYPE("text/xml") + .OTHERMDTYPE("DVLINKS"); + + MdSec digiprovMD = new MdSec(); + digiprovMD.setID("DIGIPROV"); + digiprovMD.setMdWrap(mdWrapBuilder2.build()); + amdSec.getDigiprovMD().add(digiprovMD); + + for (AmdSec amdSec2 : mets.getAmdSec()) { + if ("AMD_UBROSTOCK".equals(amdSec2.getID())) { + mets.getAmdSec().remove(amdSec2); + break; + } + } + } + + StructMap smLogical = METSQuery.findStructMap(mets, "LOGICAL"); + if (smLogical != null) { + treeStreamOfDivs(smLogical.getDiv()) + .filter(d -> d.getDMDID() != null && d.getDMDID().contains("DMDLOG_0000")) + .findFirst() + .ifPresent(d -> { + d.setADMID("AMD_DFGVIEWER"); + }); + } + + } catch (ParserConfigurationException e) { + // should not happen; + } + } + + private String retrieveLicense(Mods mods) { + String license = null; + + String licenseWorkURI = getMODSChildren(mods, Classification.class).stream() + .filter(c -> c.getValueURI().contains("/licenseinfo#work")) + .map(x -> x.getValueURI()) + .findFirst().get(); + String licenseWork = licenseWorkURI == null ? null + : licenseWorkURI.substring(licenseWorkURI.lastIndexOf("#")).replace("#work.", ""); + if ("publicdomain".equals(licenseWork)) { + String licenseDigiURI = getMODSChildren(mods, Classification.class).stream() + .filter(c -> c.getValueURI().contains("/licenseinfo#digitisedimages")) + .map(x -> x.getValueURI()) + .findFirst().get(); + String licenseDigi = licenseDigiURI == null ? null + : licenseDigiURI.substring(licenseDigiURI.lastIndexOf("#")).replace("#digitisedimages.", ""); + if ("norestrictions".equals(licenseDigi)) { + license = "pdm"; + } else { + license = retrieveLicenseFromTerm(licenseDigi); + } + } else { + license = retrieveLicenseFromTerm(licenseWork); + } + return license; + } + + private static List CC_TERMS = Arrays.asList("cc-by-nc-nd", "cc-by-nc-sa", "cc-by-nc", "cc-by-nd", + "cc-by-sa", "cc-by", "cc0"); + + private String retrieveLicenseFromTerm(String term) { + for (String c : CC_TERMS) { + if (term.contains(c)) { + return c; + } + } + return "reserved"; + } + + private static DateTimeFormatter ISO_FORMATTER = new DateTimeFormatterBuilder().appendInstant(0).toFormatter(); + + private void updateHeader(Mets mets) { + MetsHdr metsHdr = mets.getMetsHdr(); + metsHdr.setLASTMODDATE(ISO_FORMATTER.format(Instant.now())); + metsHdr.setRECORDSTATUS("PRESENTATION_DV"); + mets.setOBJID(mets.getOBJID().replace("/", "_")); + + // remove all editing agents + List agents = metsHdr.getAgent(); + agents.removeIf(a -> "SOFTWARE".equals(a.getOTHERTYPE()) && AgentROLE.EDITOR == a.getROLE()); + } + + private void updateAltoFileGrp(Mets mets) { + FileGrp fgALTO = METSQuery.findFileGrpOfID(mets, "ALTO"); + if (fgALTO != null) { + fgALTO.setUSE("FULLTEXT"); + for (File f : fgALTO.getFile()) { + f.setSIZE(null); + f.setCHECKSUM(null); + f.setCHECKSUMTYPE(null); + FLocat fl = f.getFLocat().get(0); + fl.setLOCTYPE(LOCTYPE.URL); + fl.setOTHERLOCTYPE(null); + fl.setXlinkHref( + StringUtils.stripEnd(MCRFrontendUtil.getBaseURL(), "/") + + "/depot/" + mets.getOBJID().replace("/", "_") + "/" + fl.getXlinkHref()); + } + } + + } + + private void addPDFDownloadForCompleteDocument(Mets mets) { + String id = "DOWNLOAD_file_0000"; + FileGrp fgDownload = METSQuery.findOrCreateFileGrpOfUse(mets, "DOWNLOAD"); + File fDown = new File(); + fDown.setID(id); + fDown.setMIMETYPE("text/html"); + fgDownload.getFile().add(0, fDown); + + FLocat fLocat = new FLocat(); + fLocat.setLOCTYPE(LOCTYPE.URL); + + // https://rosdok.uni-rostock.de/do/pdfdownload/recordIdentifier/rosdok_ppn1853780359 + //TODO use 'recordIdentifier:' + fLocat.setXlinkHref(StringUtils.stripEnd(MCRFrontendUtil.getBaseURL(), "/") + + "/do/pdfdownload/recordIdentifier/" + mets.getOBJID() + "/" + mets.getOBJID() + ".pdf"); + fDown.getFLocat().add(fLocat); + + Div physRoot = METSQuery.findStructMap(mets, "PHYSICAL").getDiv(); + Fptr fptr = new Fptr(); + fptr.setFILEID(id); + physRoot.getFptr().add(fptr); + } + + private void addTeaserFileGroup(Mets mets) { + /* + * //use resolving link downloadURL = env.getProperty("goobi2mycore." + * + creator + ".metswriter.coverimage.url"); try { // double encoded + * since single encoded slash is problematic downloadURL = + * downloadURL.replace("{0}", + * URLEncoder.encode(URLEncoder.encode(mets.getOBJID(), "UTF-8"), + * "UTF-8")); } catch (UnsupportedEncodingException e) { // won't not + * happen } + */ + + // use IIIF API + Optional
teaserDiv = METSQuery.findStructMap(mets, "PHYSICAL").getDiv().getDiv() + .stream() + .filter(d -> "START_PAGE".equals(d.getXlinkLabel())) + .findFirst(); + + if (teaserDiv.isPresent()) { + String teaserURL = MCRFrontendUtil.getBaseURL() + "api/iiif/image/v2" + + "/" + retrieveIIIFID(teaserDiv.get()) + "/full/!400,400/0/default.jpg"; + // create TEASER filegroup + String id = "TEASER_file_0000"; + FileGrp fgDownload = METSQuery.findOrCreateFileGrpOfUse(mets, "TEASER"); + File fDown = new File(); + fDown.setID(id); + fDown.setMIMETYPE("text/html"); + fgDownload.getFile().add(0, fDown); + + FLocat fLocat = new FLocat(); + fLocat.setLOCTYPE(LOCTYPE.URL); + fLocat.setXlinkHref(teaserURL); + fDown.getFLocat().add(fLocat); + + Div physRoot = METSQuery.findStructMap(mets, "PHYSICAL").getDiv(); + Fptr fptr = new Fptr(); + fptr.setFILEID(id); + physRoot.getFptr().add(fptr); + } + } + + //TODO trennen + private void updateDMDSec(Mets mets, Mods mods) { + mets.getDmdSec().stream() + .filter(dmdSec -> "DMDLOG_0000".equals(dmdSec.getID())) + .forEach(dmdSec -> { + dmdSec.setCREATED(null); + dmdSec.getMdWrap().setMDTYPEVERSION(null); + + org.w3c.dom.Document docMods = null; + try { + docMods = MODSXMLProcessor.getInstance().marshalToDOM(mods); + } catch (Exception e) { + //ignore + } + if (docMods != null) { + org.w3c.dom.Element eMods = docMods.getDocumentElement(); + // mods:note[@type='titlewordindex'] -> + NodeList nl = eMods.getElementsByTagNameNS("http://www.loc.gov/mods/v3", "note"); + for (int i = 0; i < nl.getLength(); i++) { + org.w3c.dom.Element e = (org.w3c.dom.Element) nl.item(i); + if ("titlewordindex".equals(e.getAttribute("type"))) { + String titlewordindex = e.getTextContent(); + eMods.removeChild(e); + + org.w3c.dom.Element eExtension = null; + NodeList nlExt + = eMods.getElementsByTagNameNS("http://www.loc.gov/mods/v3", "mods:extension"); + for (int j = 0; i < nlExt.getLength(); j++) { + if ("zvdd".equals(((org.w3c.dom.Element) nl.item(j)).getAttribute("displayLabel"))) { + eExtension = (org.w3c.dom.Element) nl.item(j); + } + } + if (eExtension == null) { + eExtension = docMods.createElementNS("http://www.loc.gov/mods/v3", "mods:extension"); + eExtension.setAttribute("displayLabel", "zvdd"); + eMods.appendChild(eExtension); + } + eExtension.appendChild(docMods.createComment( + "We have not found a valid and documented ZVDD Namespace URI !!!")); + org.w3c.dom.Element eZVDDWrap + = docMods.createElementNS("http://zvdd.de/namespace", "zvdd:zvddWrap"); + eExtension.appendChild(eZVDDWrap); + org.w3c.dom.Element eZVDDTitleWord + = docMods.createElementNS("http://zvdd.de/namespace", "zvdd:titleWord"); + eZVDDWrap.appendChild(eZVDDTitleWord); + eZVDDTitleWord.setTextContent(titlewordindex); + } + } + dmdSec.getMdWrap().getXmlData().getNodes().clear(); + dmdSec.getMdWrap().getXmlData().getNodes().add(eMods.cloneNode(true)); + } + + }); + } + + private void removePicaDMDSec(Mets mets) { + StructMap smLogical = METSQuery.findStructMap(mets, "LOGICAL"); + if (smLogical != null) { + treeStreamOfDivs(smLogical.getDiv()) + .filter(d -> d.getDMDID() != null + && d.getDMDID().contains("DMDLOG_0000_PICA")) + .forEach(d -> { + d.setDMDID(d.getDMDID().replace("DMDLOG_0000_PICA", "").replaceAll("\\s+", " ").trim()); + }); + } + + mets.getDmdSec().removeIf(dmdSec -> "DMDLOG_0000_PICA".equals(dmdSec.getID())); + } + + //TODO which is the "static" URL for our DV-METS files + private void updateMetsPtr(Mets mets) { + StructMap smLogical = METSQuery.findStructMap(mets, "LOGICAL"); + treeStreamOfDivs(smLogical.getDiv()) + .filter(d -> d.getMptr().size() > 0) + .forEach(div -> { + for (Mptr mptr : div.getMptr()) { + String recordIdentifier = mptr.getXlinkHref().replace("/", "_"); + mptr.setXlinkHref(StringUtils.stripEnd(MCRFrontendUtil.getBaseURL(), "/") + + "/generated/recordIdentifier:" + recordIdentifier + "_dv.mets.xml"); + mptr.setOTHERLOCTYPE(null); + mptr.setLOCTYPE(LOCTYPE.URL); + } + }); + } + + //TODO move to LibMeta + private Stream
treeStreamOfDivs(Div d) { + return Stream.concat(Stream.of(d), d.getDiv().stream().flatMap(x -> treeStreamOfDivs(x))); + } + + //TODO in 2024.06 this should be replaced with MCRMetaXML.getFirstContentElement() + private Element getFirstContentElement(MCRMetaXML mcrMetaXML) { + return mcrMetaXML.getContent().stream().filter(Element.class::isInstance).map(Element.class::cast).findFirst() + .orElse(null); + } + + //TODO move to LibMeta + public static List getMODSChildren(Mods mods, Class type) { + return mods.getContent().stream().filter(type::isInstance).map(type::cast).toList(); + } +} diff --git a/jspdocportal-diskcache/src/main/resources/config/jspdocportal-diskcache/mycore.properties b/jspdocportal-diskcache/src/main/resources/config/jspdocportal-diskcache/mycore.properties index 42564609..faef7de4 100644 --- a/jspdocportal-diskcache/src/main/resources/config/jspdocportal-diskcache/mycore.properties +++ b/jspdocportal-diskcache/src/main/resources/config/jspdocportal-diskcache/mycore.properties @@ -20,7 +20,7 @@ MCR.Diskcache.Cache.iiif-manifest.LivespanInMillis=3600000 MCR.Diskcache.Cache.dv-mets.Class=org.mycore.jspdocportal.diskcache.MCRDiskcacheConfig MCR.Diskcache.Cache.dv-mets.URLSuffix=_dv.mets.xml MCR.Diskcache.Cache.dv-mets.MimeType=application/xml -MCR.Diskcache.Cache.dv-mets.Generator.Class=org.mycore.jspdocportal.diskcache.generator.SimpleGenerator +MCR.Diskcache.Cache.dv-mets.Generator.Class=org.mycore.jspdocportal.diskcache.generator.MCRDiskcacheDVMETSGenerator MCR.Diskcache.Cache.hello.Class=org.mycore.jspdocportal.diskcache.MCRDiskcacheConfig MCR.Diskcache.Cache.hello.URLSuffix=/hello