Skip to content

Commit

Permalink
Allow iFrames from trusted sources in MarkdownView
Browse files Browse the repository at this point in the history
- Add a new Showdown extension to allow iFrames from trusted sources
- Add a property in the AppModel to store the list of trusted sources
- Allow iFrames through the xss filter with limited attributes so that they can be processed by the new Showdown extension
- Add the new Showdown extension to the MarkdownView

Issue #1383
  • Loading branch information
robyngit committed Sep 20, 2024
1 parent eb2e381 commit 27bf4c5
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 0 deletions.
159 changes: 159 additions & 0 deletions src/components/showdown/extensions/showdown-iframes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/**
* SHOWDOWN IFRAMES
*
* This extension filters out iframes with src attributes that
* are not from a trusted source
*/

/** List of trusted URL patterns */
const TRUSTED_SOURCES = MetacatUI.appModel.get("trustedContentSources") || [];

/**
* The sandbox to add to iframes from trusted sources. This allows the iframe
* some capabilities, such as running scripts and accessing it's own origin.
*/
const SANDBOX = `sandbox="allow-scripts allow-same-origin"`;

/**
* Regular expression that finds all iframes in the markdown content. The regex
* captures the full iframe tag, the src attribute, the inner content, and the
* closing tag, if it exists.
* @type {RegExp}
*/
const IFRAME_REGEX =
/<iframe[^>]*?\bsrc="([^"]*)"[^>]*?>([\s\S]*?)(<\/iframe>)?/g;

/**
* Function to convert URL patterns with wildcards to regex patterns.
* @param {string} wildcardPattern - The URL pattern with wildcards
* @returns {RegExp} - The regex pattern
*/
function patternToRegex(wildcardPattern) {
// Extract protocol if specified
let protocol = "";
let pattern = wildcardPattern;
const protocolMatch = pattern.match(/^(https?:\/\/)/);
if (protocolMatch) {
[, protocol] = protocolMatch;
pattern = wildcardPattern.slice(protocol.length);
}

// Escape special regex characters except for '*'
let escapedPattern = pattern.replace(/[-/\\^$+?.()|[\]{}]/g, "\\$&");
// Replace '*' with '.*'
escapedPattern = escapedPattern.replace(/\*/g, ".*");
// Escape the protocol
const escapedProtocol = protocol.replace(/[-/\\^$+?.()|[\]{}]/g, "\\$&");
// Build the full regex pattern
const regexString = `^${escapedProtocol}${escapedPattern}$`;

return new RegExp(regexString, "i"); // Case-insensitive matching
}

/**
* Check if a URL is valid according to the trusted sources. Trusted sources may
* use wildcards (*) to match multiple URLs. For example, the trusted source
* "https://*dataone.org/*" will match any URL that starts with "https://",
* contains "dataone.org", and ends with a path. The trusted source
* "*arcticdata.io*" will match any URL that contains "arcticdata.io". It could
* also include wildcards at any position, such as
* "*arcticdata.io/*\/something".
* @param {string} url - The URL to check
* @returns {boolean} - True if the URL is trusted, false otherwise
*/
function isTrustedUrl(url) {
if (!TRUSTED_SOURCES?.length) return false;

try {
const urlObj = new URL(url);
if (!urlObj.protocol.startsWith("http")) {
return false;
}
} catch (e) {
return false;
}

// Check if the URL matches any of the trusted sources
for (let i = 0; i < TRUSTED_SOURCES.length; i += 1) {
const pattern = TRUSTED_SOURCES[i];
const regex = patternToRegex(pattern);

if (regex.test(url)) {
return true;
}
}

return false;
}

/**
* Replace iFrames that are NOT from trusted sources with a link to the source
* URL. Make the iFrames from trusted sources secure by adding the 'sandbox'
* attribute, which restricts the iframe's capabilities. Remove any inner
* content from the iframe.
* @param {string} iframe - The full iframe tag
* @param {string} src - The src attribute of the iframe
* @param {string} _innerContent - The inner content of the iframe tag
* @param {string} closingTag - The closing iframe tag
* @param {number} _index - The index of the match
* @param {string} _markdown - The full markdown content
* @returns {string} - The secure iframe tag
*/
const secureIFrame = (
iframe,
src,
_innerContent,
closingTag,
_index,
_markdown,
) => {
// Return as a link instead of an iframe if the source is not trusted
if (!isTrustedUrl(src)) {
return `<a href="${src}" target="_blank" rel="noopener noreferrer"><b>External Content</b>: ${src}</a>`;
}

// Find the position of the first '>' that ends the opening iframe tag
const openingTagEndIndex = iframe.indexOf(">");

// Add the 'sandbox' attr and strip out any inner content
if (openingTagEndIndex !== -1) {
// Extract the opening tag
let openingTag = iframe.slice(0, openingTagEndIndex);

// Ensure 'sandbox' attribute exists with the correct value
if (!/\bsandbox=/.test(openingTag)) {
// Add the 'sandbox' attribute
openingTag += ` ${SANDBOX}`;
} else {
// Update the existing 'sandbox' attribute to have the correct value
openingTag = openingTag.replace(/\bsandbox="[^"]*"/, SANDBOX);
}

// Close the opening tag
openingTag += ">";

let newIframe;
if (closingTag) {
// Reconstruct the iframe without inner content and include the closing tag
newIframe = `${openingTag}${closingTag}`;
} else {
// If there is no closing tag, self-close the iframe
newIframe = openingTag.replace(">", " />");
}

return newIframe;
}

// If the iframe tag is malformed and doesn't contain '>', return it as is
return iframe;
};

const extension = {
type: "output",
regex: IFRAME_REGEX,
replace: secureIFrame,
};

define(["showdown"], (showdown) => {
showdown.extension("showdown-iframes", () => [extension]);
});
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ define(['showdown', 'xss'], function (showdown, xss) {
var options = {
css: false,
allowList: {
iframe: ["src", "width", "height", "frameborder", "allowfullscreen"],
a: ["target", "href", "title", "class", "target"],
abbr: ["title"],
address: [],
Expand Down
2 changes: 2 additions & 0 deletions src/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ require.config({
"/components/showdown/extensions/showdown-xss-filter/xss.min",
showdownHtags:
MetacatUI.root + "/components/showdown/extensions/showdown-htags",
showdownIframes:
MetacatUI.root + "/components/showdown/extensions/showdown-iframes",
// woofmark - markdown editor
woofmark: MetacatUI.root + "/components/woofmark.min",
// drop zone creates drag and drop areas
Expand Down
21 changes: 21 additions & 0 deletions src/js/models/AppModel.js
Original file line number Diff line number Diff line change
Expand Up @@ -1731,6 +1731,27 @@ define(["jquery", "underscore", "backbone"], function ($, _, Backbone) {
*/
feverUrl: "",

/**
* A list of trusted content sources from which MetacatUI can safely
* embed external content. This property is used to define URLs or URL
* patterns that are considered secure for embedding content in
* iframes, especially when rendering user-generated Markdown content.
*
* Each source in the list can include wildcards (`*`) to match any
* subdomain or path. For example, `"https://*.dataone.org/*"` matches
* any subdomain of `dataone.org` over HTTPS, and `"*arcticdata.io*"`
* matches any URL containing `arcticdata.io`.
*
* Set to an empty array or a falsy value to disable all embedded content.
*
* @type {string[]}
* @since 0.0.0
*/
trustedContentSources: [
"https://www.youtube.com/embed/*",
"https://player.vimeo.com/video/*",
],

/** If true, then archived content is available in the search index.
* Set to false if this MetacatUI is using a Metacat version before 2.10.0
* @type {boolean}
Expand Down
12 changes: 12 additions & 0 deletions src/js/views/MarkdownView.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ define([
"footnotes",
"showdown-citation",
"showdown-images",
"showdown-iframes",
];

var numTestsTodo = SDextensions.length;
Expand Down Expand Up @@ -219,6 +220,8 @@ define([
regexCitation = /\[@.+\]/;
// test for any <h.> tags
(regexHtags = new RegExp("#\\s")), (regexImages = /!\[.*\]\(\S+\)/);
// test for anything that looks like an iframe. Keep it very general.
const regexIframes = /<iframe.*?src="(.*?)"(.*?)><\/iframe>/g;

// ================================================================
// Test for and load each as required each showdown extension
Expand Down Expand Up @@ -342,6 +345,15 @@ define([
} else {
updateExtensionList("showdown-images", (required = false));
}

// --- Test for iframes --- //
if (regexIframes.test(markdown)) {
require(["showdownIframes"], function (showdownIframes) {
updateExtensionList("showdown-iframes", (required = true));
});
} else {
updateExtensionList("showdown-iframes", (required = false));
}
},

/**
Expand Down

0 comments on commit 27bf4c5

Please sign in to comment.