Skip to content

Commit

Permalink
Use more complex logic for url decode.
Browse files Browse the repository at this point in the history
  • Loading branch information
Fabian-Schmidt committed Sep 5, 2024
1 parent a02704e commit 67da7cc
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 14 deletions.
22 changes: 22 additions & 0 deletions src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,28 @@ public async void ValidateInvalidRootLinkShouldHaveErrors(string path)
linkError.Severity.Should().Be(MarkdownErrorSeverity.Error);
}

[Theory]
// Adopted behaviour from DocFx tests <https://github.com/dotnet/docfx/blob/cca05f505e30c5ede36973c4b989fce711f2e8ad/test/Docfx.Common.Tests/RelativePathTest.cs#L400-L412>
// Modified that expected result of Encoded var is upper case, instead of same case as original.
[InlineData("a/b/c", "a/b/c")]
[InlineData("../a/b/c", "../a/b/c")]
[InlineData("a/b/c%20d", "a/b/c d")]
[InlineData("../a%2Bb/c/d", "../a+b/c/d")]
[InlineData("a%253fb", "a%3fb")]
[InlineData("a%2fb", "a%2Fb")]
[InlineData("%2A%2F%3A%3F%5C", "%2A%2F%3A%3F%5C")] //*/:?\
[InlineData("%2a%2f%3a%3f%5c", "%2A%2F%3A%3F%5C")]
public void ValidateLocalUrlDecode(string path, string expected)
{
//Act
int line = 499;
int column = 75;
Hyperlink link = new Hyperlink(null, line, column, path);

Assert.Equal(path, link.OriginalUrl);
Assert.Equal(expected, link.Url);
}

[Fact]
public async void ValidateLocalLinkWithFullPathShouldHaveErrors()
{
Expand Down
75 changes: 71 additions & 4 deletions src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
namespace DocLinkChecker.Models
{
using System;
using System.IO;
using System.Linq;
using System.Text;
using DocLinkChecker.Enums;

/// <summary>
/// Model class for hyperlink.
/// </summary>
public class Hyperlink : MarkdownObjectBase
{
private static readonly char[] UriFragmentOrQueryString = new char[] { '#', '?' };
private static readonly char[] AdditionalInvalidChars = @"\/?:*".ToArray();
private static readonly char[] InvalidPathChars = Path.GetInvalidPathChars().Concat(AdditionalInvalidChars).ToArray();

/// <summary>
/// Initializes a new instance of the <see cref="Hyperlink"/> class.
/// </summary>
Expand All @@ -26,6 +33,7 @@ public Hyperlink(string filePath, int line, int col, string url)
: base(filePath, line, col)
{
Url = url;
OriginalUrl = Url;

LinkType = HyperlinkType.Empty;
if (!string.IsNullOrWhiteSpace(url))
Expand All @@ -48,10 +56,7 @@ public Hyperlink(string filePath, int line, int col, string url)
}
else
{
// Logic for DocFx RelativePath class is complex <https://github.com/dotnet/docfx/blob/cca05f505e30c5ede36973c4b989fce711f2e8ad/src/Docfx.Common/Path/RelativePath.cs>
// Cannot use `Uri.UnescapeDataString` as it would also decode `%2F` to `/'. DocFX does not do this.
// To avoid writing a complex logic. Just basic replacing of some chars.
Url = Url.Replace("%20", " ").Replace("%7E", "~");
Url = UrlDecode(Url);

if (Path.GetExtension(url).ToLower() == ".md" || Path.GetExtension(url) == string.Empty)
{
Expand All @@ -72,6 +77,11 @@ public Hyperlink(string filePath, int line, int col, string url)
/// </summary>
public string Url { get; set; }

/// <summary>
/// Gets or sets the original URL as found in the Markdown document. Used for reporting to user so they can find the correct location. Url will be modified.
/// </summary>
public string OriginalUrl { get; set; }

/// <summary>
/// Gets or sets a value indicating whether this is a web link.
/// </summary>
Expand Down Expand Up @@ -182,5 +192,62 @@ public string UrlFullPath
return Url;
}
}

/// <summary>
/// Decoding of local Urls. Similar to logic from DocFx RelativePath class.
/// https://github.com/dotnet/docfx/blob/cca05f505e30c5ede36973c4b989fce711f2e8ad/src/Docfx.Common/Path/RelativePath.cs .
/// </summary>
/// <param name="url">Url.</param>
/// <returns>Decoded Url.</returns>
private string UrlDecode(string url)
{
// This logic only applies to relative paths.
if (Path.IsPathRooted(url))
{
return url;
}

var anchor = string.Empty;
var index = url.IndexOfAny(UriFragmentOrQueryString);
if (index != -1)
{
anchor = url.Substring(index);
url = url.Remove(index);
}

var parts = url.Split('/', '\\');
var newUrl = new StringBuilder();
for (int i = 0; i < parts.Length; i++)
{
if (i > 0)
{
newUrl.Append('/');
}

var origin = parts[i];
var value = Uri.UnescapeDataString(origin);

var splittedOnInvalidChars = value.Split(InvalidPathChars);
var originIndex = 0;
var valueIndex = 0;
for (int j = 0; j < splittedOnInvalidChars.Length; j++)
{
if (j > 0)
{
var invalidChar = value[valueIndex];
valueIndex++;
newUrl.Append(Uri.EscapeDataString(invalidChar.ToString()));
}

var splitOnInvalidChars = splittedOnInvalidChars[j];
originIndex += splitOnInvalidChars.Length;
valueIndex += splitOnInvalidChars.Length;
newUrl.Append(splitOnInvalidChars);
}
}

newUrl.Append(anchor);
return newUrl.ToString();
}
}
}
20 changes: 10 additions & 10 deletions src/DocLinkChecker/DocLinkChecker/Services/LinkValidatorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,12 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink)

if (hyperlink.Url.Matches(whitelist))
{
_console.Verbose($"Skipping whitelisted url {hyperlink.Url}");
_console.Verbose($"Skipping whitelisted url {hyperlink.OriginalUrl}");
return;
}
}

_console.Verbose($"Validating {hyperlink.Url} in {_fileService.GetRelativePath(_config.DocumentationFiles.SourceFolder, hyperlink.FilePath)}");
_console.Verbose($"Validating {hyperlink.OriginalUrl} in {_fileService.GetRelativePath(_config.DocumentationFiles.SourceFolder, hyperlink.FilePath)}");
using var scope = _serviceProvider.CreateScope();
var client = scope.ServiceProvider.GetRequiredService<CheckerHttpClient>();

Expand All @@ -204,7 +204,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink)
sw.Stop();
if (sw.ElapsedMilliseconds > _config.DocLinkChecker.ExternalLinkDurationWarning)
{
_console.Warning($"*** WARNING: Checking {hyperlink.Url} took {sw.ElapsedMilliseconds}ms.");
_console.Warning($"*** WARNING: Checking {hyperlink.OriginalUrl} took {sw.ElapsedMilliseconds}ms.");
}

if (!result.success)
Expand All @@ -229,7 +229,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
severity,
$"{hyperlink.Url} => {result.statusCode}"));
$"{hyperlink.OriginalUrl} => {result.statusCode}"));
}
}
else
Expand All @@ -241,7 +241,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"{hyperlink.Url} => {result.error}"));
$"{hyperlink.OriginalUrl} => {result.error}"));
}
}
}
Expand All @@ -268,7 +268,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"Full path not allowed as link: {hyperlink.Url}"));
$"Full path not allowed as link: {hyperlink.OriginalUrl}"));
return Task.CompletedTask;
}

Expand All @@ -285,7 +285,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"Not found: {hyperlink.Url}"));
$"Not found: {hyperlink.OriginalUrl}"));
return Task.CompletedTask;
}
}
Expand All @@ -301,7 +301,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"Not found: {hyperlink.Url}"));
$"Not found: {hyperlink.OriginalUrl}"));
return Task.CompletedTask;
}
}
Expand All @@ -317,7 +317,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"File referenced outside of the same /docs hierarchy not allowed: {hyperlink.Url}"));
$"File referenced outside of the same /docs hierarchy not allowed: {hyperlink.OriginalUrl}"));
return Task.CompletedTask;
}

Expand All @@ -332,7 +332,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink)
hyperlink.Line,
hyperlink.Column,
MarkdownErrorSeverity.Error,
$"File referenced outside of anything else then a /docs hierarchy not allowed: {hyperlink.Url}"));
$"File referenced outside of anything else then a /docs hierarchy not allowed: {hyperlink.OriginalUrl}"));
return Task.CompletedTask;
}

Expand Down

0 comments on commit 67da7cc

Please sign in to comment.