From 2f4d916eacb053df8cbb4110d42bc7496dba9faa Mon Sep 17 00:00:00 2001 From: srosset81 Date: Fri, 27 Sep 2024 20:36:49 +0200 Subject: [PATCH 1/5] BackupService: new option to delete backups after copy --- src/middleware/packages/backup/index.js | 59 ++++++++++++------- .../packages/backup/utils/rsyncCopy.js | 4 +- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/middleware/packages/backup/index.js b/src/middleware/packages/backup/index.js index 9560d5581..c204bee00 100644 --- a/src/middleware/packages/backup/index.js +++ b/src/middleware/packages/backup/index.js @@ -1,5 +1,6 @@ const { CronJob } = require('cron'); const fs = require('fs'); +const { emptyDirSync } = require('fs-extra'); const pathJoin = require('path').join; const fsCopy = require('./utils/fsCopy'); const ftpCopy = require('./utils/ftpCopy'); @@ -18,6 +19,7 @@ const BackupService = { otherDirsPaths: {} }, copyMethod: 'rsync', // rsync, ftp, or fs + deleteBackupsAfterCopy: false, remoteServer: { path: null, // Required user: null, // Required by rsync and ftp @@ -35,16 +37,21 @@ const BackupService = { started() { const { cronJob, + copyMethod, localServer: { fusekiBase } } = this.settings; - if (cronJob.time) { - this.cronJob = new CronJob(cronJob.time, this.actions.backupAll, null, true, cronJob.timeZone); - } - if (!fusekiBase) { throw new Error('Backup service requires `localServer.fusekiBase` setting to be set to the FUSEKI_BASE path.'); } + + if (!['rsync', 'ftp', 'fs'].includes(copyMethod)) { + throw new Error(`The copyMethod setting must be either rysnc, ftp or fs. Provided: ${copyMethod}`); + } + + if (cronJob.time) { + this.cronJob = new CronJob(cronJob.time, this.actions.backupAll, null, true, cronJob.timeZone); + } }, actions: { async backupAll(ctx) { @@ -59,10 +66,17 @@ const BackupService = { await ctx.call('triplestore.dataset.backup', { dataset }); } - await this.actions.copyToRemoteServer( - { path: pathJoin(this.settings.localServer.fusekiBase, 'backups'), subDir: 'datasets' }, + const backupsDirPath = pathJoin(this.settings.localServer.fusekiBase, 'backups'); + + const copied = await this.actions.copyToRemoteServer( + { path: backupsDirPath, subDir: 'datasets' }, { parentCtx: ctx } ); + + // If there was an error on copy, don't delete the backups + if (copied && this.settings.deleteBackupsAfterCopy) { + emptyDirSync(backupsDirPath); + } }, async backupOtherDirs(ctx) { const { otherDirsPaths } = this.settings.localServer; @@ -84,24 +98,27 @@ const BackupService = { // Path is mandatory for all copy methods if (!remoteServer.path) { this.logger.info('No remote server config defined, skipping remote backup...'); - return; + return false; } - switch (copyMethod) { - case 'rsync': - await rsyncCopy(path, subDir, remoteServer); - break; - - case 'ftp': - await ftpCopy(path, subDir, remoteServer); - break; + try { + switch (copyMethod) { + case 'rsync': + await rsyncCopy(path, subDir, remoteServer, false); + break; - case 'fs': - await fsCopy(path, subDir, remoteServer); - break; + case 'ftp': + await ftpCopy(path, subDir, remoteServer); + break; - default: - throw new Error(`Unknown copy method: ${copyMethod}`); + case 'fs': + await fsCopy(path, subDir, remoteServer); + break; + } + return true; + } catch (e) { + this.logger.error(`Failed to copy ${path} to remote server with ${copyMethod}. Error: ${e.message}`); + return false; } }, deleteDataset: { @@ -147,7 +164,7 @@ const BackupService = { } } }, - /** Returns an array of file paths to the backups relative to `this.settings.localServer.fusekiBase`. */ + // Returns an array of file paths to the backups relative to `this.settings.localServer.fusekiBase`. async listBackupsForDataset(ctx) { const { dataset } = ctx.params; diff --git a/src/middleware/packages/backup/utils/rsyncCopy.js b/src/middleware/packages/backup/utils/rsyncCopy.js index 573075266..000645ec7 100644 --- a/src/middleware/packages/backup/utils/rsyncCopy.js +++ b/src/middleware/packages/backup/utils/rsyncCopy.js @@ -12,12 +12,12 @@ const rsyncCopy = (path, subDir, remoteServer, syncDelete = false) => { if (syncDelete) rsync.set('delete'); return new Promise((resolve, reject) => { - console.log(`Rsync started with command: ${rsync.command()}`); + this.logger.info(`Rsync started with command: ${rsync.command()}`); rsync.execute(error => { if (error) { reject(error); } else { - console.log('Rsync finished !'); + this.logger.info('Rsync finished !'); resolve(); } }); From 2359bf5c09d99bb1b158c09fd8bbc84cd4403deb Mon Sep 17 00:00:00 2001 From: srosset81 Date: Fri, 27 Sep 2024 20:40:36 +0200 Subject: [PATCH 2/5] Rename to deleteFusekiBackupsAfterCopy --- src/middleware/packages/backup/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/middleware/packages/backup/index.js b/src/middleware/packages/backup/index.js index c204bee00..a2f2b7c8b 100644 --- a/src/middleware/packages/backup/index.js +++ b/src/middleware/packages/backup/index.js @@ -19,7 +19,7 @@ const BackupService = { otherDirsPaths: {} }, copyMethod: 'rsync', // rsync, ftp, or fs - deleteBackupsAfterCopy: false, + deleteFusekiBackupsAfterCopy: false, remoteServer: { path: null, // Required user: null, // Required by rsync and ftp @@ -74,7 +74,7 @@ const BackupService = { ); // If there was an error on copy, don't delete the backups - if (copied && this.settings.deleteBackupsAfterCopy) { + if (copied && this.settings.deleteFusekiBackupsAfterCopy) { emptyDirSync(backupsDirPath); } }, From 0c679200fe6b491920194d2ad6ab664932746012 Mon Sep 17 00:00:00 2001 From: srosset81 Date: Sun, 29 Sep 2024 19:21:47 +0200 Subject: [PATCH 3/5] updateCollectionsOptions: allow to pass dataset in params --- .../activitypub/subservices/collections-registry.js | 6 +++--- .../activitypub/services/activitypub/subservices/follow.js | 7 +++++-- .../activitypub/services/activitypub/subservices/inbox.js | 4 +++- .../activitypub/services/activitypub/subservices/like.js | 7 +++++-- .../activitypub/services/activitypub/subservices/outbox.js | 4 +++- .../activitypub/services/activitypub/subservices/reply.js | 4 +++- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js b/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js index 29e2ba559..5229cc0b6 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js @@ -133,7 +133,7 @@ const CollectionsRegistryService = { } }, async updateCollectionsOptions(ctx) { - const { collection } = ctx.params; + let { collection, dataset } = ctx.params; let { attachPredicate, ordered, summary, dereferenceItems, itemsPerPage, sortPredicate, sortOrder } = collection || {}; @@ -142,9 +142,9 @@ const CollectionsRegistryService = { sortOrder = sortOrder && (await ctx.call('jsonld.parser.expandPredicate', { predicate: sortOrder })); const accounts = await this.broker.call('auth.account.find'); - const datasets = this.settings.podProvider ? accounts.map(a => a.username) : [undefined]; + const datasets = dataset ? [dataset] : this.settings.podProvider ? accounts.map(a => a.username) : [undefined]; - for (let dataset of datasets) { + for (dataset of datasets) { this.logger.info(`Getting all collections in dataset ${dataset} attached with predicate ${attachPredicate}...`); const results = await ctx.call('triplestore.query', { diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/follow.js b/src/middleware/packages/activitypub/services/activitypub/subservices/follow.js index d95491793..291aebeb4 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/follow.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/follow.js @@ -109,11 +109,14 @@ const FollowService = { }); }, async updateCollectionsOptions(ctx) { + const { dataset } = ctx.params; await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.followersCollectionOptions + collection: this.settings.followersCollectionOptions, + dataset }); await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.followingCollectionOptions + collection: this.settings.followingCollectionOptions, + dataset }); } }, diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/inbox.js b/src/middleware/packages/activitypub/services/activitypub/subservices/inbox.js index e1ead78d1..ed8486c69 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/inbox.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/inbox.js @@ -169,8 +169,10 @@ const InboxService = { return activities; }, async updateCollectionsOptions(ctx) { + const { dataset } = ctx.params; await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.collectionOptions + collection: this.settings.collectionOptions, + dataset }); } } diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/like.js b/src/middleware/packages/activitypub/services/activitypub/subservices/like.js index 88c21d601..8cae157ca 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/like.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/like.js @@ -82,11 +82,14 @@ const LikeService = { } }, async updateCollectionsOptions(ctx) { + const { dataset } = ctx.params; await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.likesCollectionOptions + collection: this.settings.likesCollectionOptions, + dataset }); await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.likedCollectionOptions + collection: this.settings.likedCollectionOptions, + dataset }); } }, diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/outbox.js b/src/middleware/packages/activitypub/services/activitypub/subservices/outbox.js index 6f6682fb6..d0527f380 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/outbox.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/outbox.js @@ -133,8 +133,10 @@ const OutboxService = { return activity; }, async updateCollectionsOptions(ctx) { + const { dataset } = ctx.params; await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.collectionOptions + collection: this.settings.collectionOptions, + dataset }); } }, diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/reply.js b/src/middleware/packages/activitypub/services/activitypub/subservices/reply.js index 2069e82a8..6ab952148 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/reply.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/reply.js @@ -60,8 +60,10 @@ const ReplyService = { }); }, async updateCollectionsOptions(ctx) { + const { dataset } = ctx.params; await ctx.call('activitypub.collections-registry.updateCollectionsOptions', { - collection: this.settings.collectionOptions + collection: this.settings.collectionOptions, + dataset }); } }, From 07d39fa4ac507a2b4406af3f34d91e1e0b82a945 Mon Sep 17 00:00:00 2001 From: srosset81 Date: Mon, 30 Sep 2024 16:21:03 +0200 Subject: [PATCH 4/5] updateCollectionsOptions: only update local collections --- .../subservices/collections-registry.js | 87 +++++++++++-------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js b/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js index 5229cc0b6..21159e212 100644 --- a/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js +++ b/src/middleware/packages/activitypub/services/activitypub/subservices/collections-registry.js @@ -160,40 +160,42 @@ const CollectionsRegistryService = { }); for (const collectionUri of results.map(r => r.collectionUri.value)) { - this.logger.info(`Updating options of ${collectionUri}...`); - await ctx.call('triplestore.update', { - query: ` - PREFIX as: - PREFIX semapps: - DELETE { - <${collectionUri}> - a ?type ; - as:summary ?summary ; - semapps:dereferenceItems ?dereferenceItems ; - semapps:itemsPerPage ?itemsPerPage ; - semapps:sortPredicate ?sortPredicate ; - semapps:sortOrder ?sortOrder . - } - INSERT { - <${collectionUri}> a ${ordered ? 'as:OrderedCollection, as:Collection' : 'as:Collection'} . - ${summary ? `<${collectionUri}> as:summary "${summary}" .` : ''} - <${collectionUri}> semapps:dereferenceItems ${dereferenceItems} . - ${itemsPerPage ? `<${collectionUri}> semapps:itemsPerPage ${itemsPerPage} .` : ''} - ${sortPredicate ? `<${collectionUri}> semapps:sortPredicate <${sortPredicate}> .` : ''} - ${sortOrder ? `<${collectionUri}> semapps:sortOrder <${sortOrder}> .` : ''} - } - WHERE { - <${collectionUri}> a ?type - OPTIONAL { <${collectionUri}> as:summary ?summary . } - OPTIONAL { <${collectionUri}> semapps:dereferenceItems ?dereferenceItems . } - OPTIONAL { <${collectionUri}> semapps:itemsPerPage ?itemsPerPage . } - OPTIONAL { <${collectionUri}> semapps:sortPredicate ?sortPredicate . } - OPTIONAL { <${collectionUri}> semapps:sortOrder ?sortOrder . } - } - `, - webId: 'system', - dataset - }); + if (this.isLocalObject(collectionUri, urlJoin(this.settings.baseUri, dataset))) { + this.logger.info(`Updating options of ${collectionUri}...`); + await ctx.call('triplestore.update', { + query: ` + PREFIX as: + PREFIX semapps: + DELETE { + <${collectionUri}> + a ?type ; + as:summary ?summary ; + semapps:dereferenceItems ?dereferenceItems ; + semapps:itemsPerPage ?itemsPerPage ; + semapps:sortPredicate ?sortPredicate ; + semapps:sortOrder ?sortOrder . + } + INSERT { + <${collectionUri}> a ${ordered ? 'as:OrderedCollection, as:Collection' : 'as:Collection'} . + ${summary ? `<${collectionUri}> as:summary "${summary}" .` : ''} + <${collectionUri}> semapps:dereferenceItems ${dereferenceItems} . + ${itemsPerPage ? `<${collectionUri}> semapps:itemsPerPage ${itemsPerPage} .` : ''} + ${sortPredicate ? `<${collectionUri}> semapps:sortPredicate <${sortPredicate}> .` : ''} + ${sortOrder ? `<${collectionUri}> semapps:sortOrder <${sortOrder}> .` : ''} + } + WHERE { + <${collectionUri}> a ?type + OPTIONAL { <${collectionUri}> as:summary ?summary . } + OPTIONAL { <${collectionUri}> semapps:dereferenceItems ?dereferenceItems . } + OPTIONAL { <${collectionUri}> semapps:itemsPerPage ?itemsPerPage . } + OPTIONAL { <${collectionUri}> semapps:sortPredicate ?sortPredicate . } + OPTIONAL { <${collectionUri}> semapps:sortOrder ?sortOrder . } + } + `, + webId: 'system', + dataset + }); + } } } } @@ -221,6 +223,23 @@ const CollectionsRegistryService = { }, hasTypeChanged(oldData, newData) { return JSON.stringify(newData.type || newData['@type']) !== JSON.stringify(oldData.type || oldData['@type']); + }, + isLocalObject(uri, actorUri) { + if (this.settings.podProvider) { + const { origin, pathname } = new URL(actorUri); + const aclBase = `${origin}/_acl${pathname}`; // URL of type http://localhost:3000/_acl/alice + const aclGroupBase = `${origin}/_groups${pathname}`; // URL of type http://localhost:3000/_groups/alice + return ( + uri === actorUri || + uri.startsWith(actorUri + '/') || + uri === aclBase || + uri.startsWith(aclBase + '/') || + uri === aclGroupBase || + uri.startsWith(aclGroupBase + '/') + ); + } else { + return uri.startsWith(this.settings.baseUri); + } } }, events: { From e86f874d5269c535058a970bca7bd55da213abdd Mon Sep 17 00:00:00 2001 From: srosset81 Date: Mon, 30 Sep 2024 19:13:13 +0200 Subject: [PATCH 5/5] Improve compact job fault tolerance --- src/jena/fuseki-docker/docker-compact-entrypoint.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/jena/fuseki-docker/docker-compact-entrypoint.sh b/src/jena/fuseki-docker/docker-compact-entrypoint.sh index 38a01f0a1..064808102 100644 --- a/src/jena/fuseki-docker/docker-compact-entrypoint.sh +++ b/src/jena/fuseki-docker/docker-compact-entrypoint.sh @@ -18,8 +18,15 @@ for operation in "${operations[@]}"; do if [ -d "$dir" ]; then if [ "$operation" == "compact" ]; then echo "Compacting ${dir}..." - # TODO use --deleteOld command available in higher Fuseki versions - /jena-fuseki/bin/tdb2.tdbcompact --loc=${dir} + { + # TODO use --deleteOld command available in higher Fuseki versions + /jena-fuseki/bin/tdb2.tdbcompact --loc=${dir} + } || { + # We immediately delete any newly-created directory, to avoid potentially correct data to be removed during the deleteOld operation + echo "Compact job failed. Deleting new directories from ${dir}..." + cd "${dir}" + find . -iname 'Data*' ! -wholename $(find . -iname 'Data*' -type d | sort -n -r | tail -n 1) -type d -exec rm -rf {} + + } else echo "Deleting old directories from ${dir}..." cd "${dir}"