Skip to content

Commit

Permalink
add sanitizing of url-pathname to avoid invalid urls within EDS
Browse files Browse the repository at this point in the history
  • Loading branch information
berwa committed Sep 30, 2024
1 parent 642d1a5 commit a66b5f1
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
8 changes: 7 additions & 1 deletion tools/importer/import-header.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

/* global WebImporter */

import { determineEdsBaseUrl } from './import-util.js';
import {
determineEdsBaseUrl,
sanitizePathname,
} from './import-util.js';

/**
* Exclude generic content from migration
Expand Down Expand Up @@ -57,6 +60,9 @@ const handleMenuEntry = (menuEntry, baseUrl) => {

// check if the target of the link is relative: handle EDS-specific url-conversions
if (href.charAt(0) === '/') {
// sanitize url-pathname
href = sanitizePathname(href);

// add base-url
href = baseUrl + href;

Expand Down
20 changes: 20 additions & 0 deletions tools/importer/import-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,24 @@ export const formatTableData = (table, formats) => {
});
};

/**
* Sanitize pathname by removing leading and trailing dashes from each section of the url
* @param pathname
* @returns {string}
*/
export const sanitizePathname = (pathname) => {
const modifiedPathnameSections = [];

// handle leading or trailing dashes in url-sections, that are not allowed (anymore) in EDS
const pathnameSections = pathname.split('/');

pathnameSections.forEach((pathnameSection) => {
modifiedPathnameSections.push(pathnameSection.replace(/^-+|-+$/g, ''));
});

return modifiedPathnameSections.join('/');
};

/**
* Preprocess method that extracts the hreflang=x-default value from the original HTML markup
* @param document
Expand Down Expand Up @@ -306,6 +324,8 @@ export const handleLinks = (main, document, baseUrl) => {

// replace relative urls
if (href.charAt(0) === '/') {
href = sanitizePathname(href);

href = baseUrl + href;

// remove possible parameters from internal links
Expand Down
3 changes: 2 additions & 1 deletion tools/importer/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import {
handleSup,
handleReferenceRows,
formatTableData,
sanitizePathname

Check failure on line 43 in tools/importer/import.js

View workflow job for this annotation

GitHub Actions / build

Missing trailing comma
} from './import-util.js';

const removeGenericContent = (main) => {
Expand Down Expand Up @@ -867,7 +868,7 @@ export default {

results.push({
element: main,
path: pathname,
path: sanitizePathname(pathname),
});

return results;
Expand Down

0 comments on commit a66b5f1

Please sign in to comment.