Skip to content

Commit

Permalink
Enable automatic URL linking
Browse files Browse the repository at this point in the history
Automatically detect links in the text content of a file and automatically
generate link annotations at the appropriate locations to achieve
automatic link detection and hyperlinking.
  • Loading branch information
ryzokuken committed Jan 29, 2025
1 parent 9bc4331 commit e0b2146
Show file tree
Hide file tree
Showing 19 changed files with 635 additions and 12 deletions.
5 changes: 5 additions & 0 deletions extensions/chromium/preferences_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,11 @@
"description": "The color is a string as defined in CSS. Its goal is to help improve readability in high contrast mode",
"type": "string",
"default": "CanvasText"
},
"enableAutoLinking": {
"description": "Automatically detect URLs in the text and create links for them",
"type": "boolean",
"default": false
}
}
}
23 changes: 23 additions & 0 deletions src/display/annotation_layer.js
Original file line number Diff line number Diff line change
Expand Up @@ -3264,6 +3264,29 @@ class AnnotationLayer {
this.#setAnnotationCanvasMap();
}

/**
* Add link annotations to the annotation layer.
*
* @param {Array<Object>} annotations
* @param {IPDFLinkService} linkService
* @memberof AnnotationLayer
*/
async addLinkAnnotations(annotations, linkService) {
const elementParams = {
data: null,
layer: this.div,
linkService,
svgFactory: new DOMSVGFactory(),
parent: this,
};
for (const data of annotations) {
elementParams.data = data;
const element = AnnotationElementFactory.create(elementParams);
const rendered = element.render();
await this.#appendElement(rendered, data.id);
}
}

/**
* Update the annotation elements on existing annotation layer.
*
Expand Down
4 changes: 4 additions & 0 deletions src/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@

import {
AbortException,
AnnotationBorderStyleType,
AnnotationEditorParamsType,
AnnotationEditorType,
AnnotationMode,
AnnotationType,
createValidAbsoluteUrl,
FeatureTest,
ImageKind,
Expand Down Expand Up @@ -89,12 +91,14 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("TESTING || GENERIC")) {

export {
AbortException,
AnnotationBorderStyleType,
AnnotationEditorLayer,
AnnotationEditorParamsType,
AnnotationEditorType,
AnnotationEditorUIManager,
AnnotationLayer,
AnnotationMode,
AnnotationType,
build,
ColorPicker,
createValidAbsoluteUrl,
Expand Down
91 changes: 91 additions & 0 deletions test/integration/autolinker_spec.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { closePages, loadAndWait } from "./test_utils.mjs";

describe("autolinker", function () {
describe("bug1019475_2.pdf", function () {
let pages;

beforeAll(async () => {
pages = await loadAndWait(
"bug1019475_2.pdf",
".annotationLayer",
null,
null,
{
enableAutoLinking: true,
}
);
});

afterAll(async () => {
await closePages(pages);
});

it("must appropriately add link annotations when relevant", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const url = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations => annotations.map(a => a.href)
);
expect(url.length).withContext(`In ${browserName}`).toEqual(1);
expect(url[0])
.withContext(`In ${browserName}`)
.toEqual("http://www.mozilla.org/");
})
);
});
});

describe("bug1019475_1.pdf", function () {
let pages;

beforeAll(async () => {
pages = await loadAndWait(
"bug1019475_1.pdf",
".annotationLayer",
null,
null,
{
enableAutoLinking: true,
}
);
});

afterAll(async () => {
await closePages(pages);
});

it("must not add links when unnecessary", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const linkIds = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations =>
annotations.map(a => a.getAttribute("data-element-id"))
);
expect(linkIds.length).withContext(`In ${browserName}`).toEqual(3);
linkIds.forEach(id =>
expect(id)
.withContext(`In ${browserName}`)
.not.toContain("added_link_")
);
})
);
});
});
});
1 change: 1 addition & 0 deletions test/integration/jasmine-boot.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ async function runTests(results) {
spec_files: [
"accessibility_spec.mjs",
"annotation_spec.mjs",
"autolinker_spec.mjs",
"caret_browsing_spec.mjs",
"copy_paste_spec.mjs",
"find_spec.mjs",
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -698,3 +698,5 @@
!issue19207.pdf
!issue19239.pdf
!issue19360.pdf
!bug1019475_1.pdf
!bug1019475_2.pdf
Binary file added test/pdfs/bug1019475_1.pdf
Binary file not shown.
Binary file added test/pdfs/bug1019475_2.pdf
Binary file not shown.
194 changes: 194 additions & 0 deletions test/unit/autolinker_spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { Autolinker } from "../../web/autolinker.js";

function testLinks(links) {
const matches = Autolinker.findLinks(links.map(link => link[0]).join("\n"));
expect(matches.length).toEqual(links.length);
for (let i = 0; i < links.length; i++) {
expect(matches[i].url).toEqual(links[i][1]);
}
}

describe("autolinker", function () {
it("should correctly find URLs", function () {
const [matched] = Autolinker.findLinks("http://www.example.com");
expect(matched.url).toEqual("http://www.example.com/");
});

it("should correctly find simple valid URLs", function () {
testLinks([
[
"http://subdomain.example.com/path/to/page?query=param",
"http://subdomain.example.com/path/to/page?query=param",
],
[
"www.example.com/path/to/resource",
"http://www.example.com/path/to/resource",
],
[
"http://example.com/path?query=value#fragment",
"http://example.com/path?query=value#fragment",
],
]);
});

it("should correctly find emails", function () {
testLinks([
["mailto:[email protected]", "mailto:[email protected]"],
[
"mailto:[email protected]",
"mailto:[email protected]",
],
["[email protected]", "mailto:[email protected]"],
["[email protected]", "mailto:[email protected]"],
[
"[email protected]", // '_' is ok before '@'.
"mailto:[email protected]",
],
[
"[email protected]", // '-' is ok in user name.
"mailto:[email protected]",
],
[
"[email protected]", // Stop at consecutive '.'.
"mailto:[email protected]",
],
[
"[email protected]", // Remove heading '.'.
"mailto:[email protected]",
],
[
"[email protected]?/", // Trim ending invalid chars.
"mailto:[email protected]",
],
[
"fan{[email protected]", // Trim beginning invalid chars.
"mailto:[email protected]",
],
[
"[email protected]..", // Trim the ending periods.
"mailto:[email protected]",
],
[
"[email protected]", // Keep the original case.
"mailto:[email protected]",
],
]);
});

it("should correctly handle complex or edge cases", function () {
testLinks([
[
"https://example.com/path/to/page?query=param&another=val#section",
"https://example.com/path/to/page?query=param&another=val#section",
],
[
"www.example.com/resource/(parentheses)-allowed/",
"http://www.example.com/resource/(parentheses)-allowed/",
],
[
"http://example.com/path_with_underscores",
"http://example.com/path_with_underscores",
],
[
"http://www.example.com:8080/port/test",
"http://www.example.com:8080/port/test",
],
[
"https://example.com/encoded%20spaces%20in%20path",
"https://example.com/encoded%20spaces%20in%20path",
],
["mailto:[email protected]", "mailto:[email protected]"],
["www.a.com/#a=@?q=rr&r=y", "http://www.a.com/#a=@?q=rr&r=y"],
["http://a.com/1/2/3/4\\5\\6", "http://a.com/1/2/3/4/5/6"],
["http://www.example.com/foo;bar", "http://www.example.com/foo;bar"],
// ["www.abc.com/#%%^&&*(", "http://www.abc.com/#%%^&&*("], TODO: Patch the regex to accept the whole URL.
]);
});

it("shouldn't find false positives", function () {
const matches = Autolinker.findLinks(
[
"not a valid URL",
"htp://misspelled-protocol.com",
"example.com (missing protocol)",
"https://[::1] (IPv6 loopback)",
"http:// (just protocol)",
"", // Blank.
"http", // No colon.
"www.", // Missing domain.
"https-and-www", // Dash not colon.
"http:/abc.com", // Missing slash.
"http://((()),", // Only invalid chars in host name.
"ftp://example.com", // Ftp scheme is not supported.
"http:example.com", // Missing slashes.
"http//[example.com", // Invalid IPv6 address.
"http//[00:00:00:00:00:00", // Invalid IPv6 address.
"http//[]", // Empty IPv6 address.
"abc.example.com", // URL without scheme.
].join("\n")
);
expect(matches.length).toEqual(0);
});

it("should correctly find links among mixed content", function () {
const matches = Autolinker.findLinks(
[
"Here's a URL: https://example.com and an email: mailto:[email protected]",
"www.example.com and more text",
"Check this: http://example.com/path?query=1 and this mailto:[email protected]",
].join("\n")
);
expect(matches.length).toEqual(5);
expect(matches[0].url).toEqual("https://example.com/");
expect(matches[1].url).toEqual("mailto:[email protected]");
expect(matches[2].url).toEqual("http://www.example.com/");
expect(matches[3].url).toEqual("http://example.com/path?query=1");
expect(matches[4].url).toEqual("mailto:[email protected]");
});

it("should correctly work with special characters", function () {
testLinks([
[
"https://example.com/path/to/page?query=value&symbol=£",
"https://example.com/path/to/page?query=value&symbol=%C2%A3",
],
[
"mailto:[email protected]",
"mailto:[email protected]",
],
["http://example.com/@user", "http://example.com/@user"],
["https://example.com/path#@anchor", "https://example.com/path#@anchor"],
["www.测试.net", "http://www.xn--0zwm56d.net/"],
["www.测试.net;", "http://www.xn--0zwm56d.net/"],
// [ "www.测试。net。", "http://www.xn--0zwm56d.net/" ] TODO: Patch `createValidAbsoluteUrl` to accept this.
]);
});

it("should correctly find links with dashes and newlines between numbers", function () {
const matches = Autolinker.findLinks("http://abcd.efg/test1-\n2/test.html");
expect(matches.length).toEqual(1);
expect(matches[0].url).toEqual("http://abcd.efg/test1-2/test.html");
});

it("should correctly identify emails with special prefixes", function () {
testLinks([
["[email protected]", "mailto:[email protected]"],
["[email protected]", "mailto:[email protected]"],
]);
});
});
1 change: 1 addition & 0 deletions test/unit/clitests.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"annotation_storage_spec.js",
"api_spec.js",
"app_options_spec.js",
"autolinker_spec.js",
"bidi_spec.js",
"canvas_factory_spec.js",
"cff_parser_spec.js",
Expand Down
Loading

0 comments on commit e0b2146

Please sign in to comment.