From 9153d420aa992f97d598785693952b7f7ad5b9a4 Mon Sep 17 00:00:00 2001 From: Fabian-Schmidt Date: Wed, 4 Sep 2024 12:05:46 -0700 Subject: [PATCH 1/3] Cheap support for %20. --- src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs | 6 ++++++ src/DocLinkChecker/DocLinkChecker.Test/MockFileService.cs | 1 + src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs index 5a76892..78ae566 100644 --- a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs +++ b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs @@ -306,6 +306,10 @@ public async void ValidateLocalLinkNonExistingHeadingShouldHaveErrors() [Theory] [InlineData("~/general/images/nature.jpeg")] [InlineData("~\\general\\images\\nature.jpeg")] + [InlineData("~/general/images/space%20image.jpeg")] + [InlineData("~\\general\\images\\space%20image.jpeg")] + [InlineData("%7E/general/images/space%20image.jpeg")] + [InlineData("%7E\\general\\images\\space%20image.jpeg")] public async void ValidateRootLinkShouldHaveNoErrors(string path) { // Arrange @@ -327,6 +331,8 @@ public async void ValidateRootLinkShouldHaveNoErrors(string path) [Theory] [InlineData("~/general/images/NON_EXISTING.jpeg")] [InlineData("~\\NON_EXISTING\\images\\nature.jpeg")] + [InlineData("~/general%2Fimages/nature.jpeg")] + [InlineData("~/general/images/space image.jpeg")] public async void ValidateInvalidRootLinkShouldHaveErrors(string path) { // Arrange diff --git a/src/DocLinkChecker/DocLinkChecker.Test/MockFileService.cs b/src/DocLinkChecker/DocLinkChecker.Test/MockFileService.cs index e570847..845297d 100644 --- a/src/DocLinkChecker/DocLinkChecker.Test/MockFileService.cs +++ b/src/DocLinkChecker/DocLinkChecker.Test/MockFileService.cs @@ -63,6 +63,7 @@ public void FillDemoSet() Files.Add($"{Root}\\general\\images\\nature.jpeg", ""); Files.Add($"{Root}\\general\\images\\another-image.png", ""); + Files.Add($"{Root}\\general\\images\\space image.jpeg", ""); Files.Add($"{Root}\\src", null); Files.Add($"{Root}\\src\\sample.cs", @"namespace MySampleApp; diff --git a/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs b/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs index 53d2381..b8fa21c 100644 --- a/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs +++ b/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs @@ -48,6 +48,11 @@ public Hyperlink(string filePath, int line, int col, string url) } else { + // Logic for DocFx RelativePath class is complex + // Cannot use `Uri.UnescapeDataString` as it would also decode `%2F` to `/'. DocFX does not do this. + // To avoid writing a complex logic. Just basic replacing of some chars. + Url = Url.Replace("%20", " ").Replace("%7E", "~"); + if (Path.GetExtension(url).ToLower() == ".md" || Path.GetExtension(url) == string.Empty) { // link to an MD file or a folder From a02704e7d6e9305803de30c9c65e1577daff2dc9 Mon Sep 17 00:00:00 2001 From: Fabian-Schmidt Date: Wed, 4 Sep 2024 12:06:00 -0700 Subject: [PATCH 2/3] Missed file change --- src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs index 78ae566..d8ca146 100644 --- a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs +++ b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs @@ -332,7 +332,6 @@ public async void ValidateRootLinkShouldHaveNoErrors(string path) [InlineData("~/general/images/NON_EXISTING.jpeg")] [InlineData("~\\NON_EXISTING\\images\\nature.jpeg")] [InlineData("~/general%2Fimages/nature.jpeg")] - [InlineData("~/general/images/space image.jpeg")] public async void ValidateInvalidRootLinkShouldHaveErrors(string path) { // Arrange From 67da7cc6d0f7dedf7762b105df036de8c274af9e Mon Sep 17 00:00:00 2001 From: Fabian-Schmidt Date: Thu, 5 Sep 2024 07:01:30 -0700 Subject: [PATCH 3/3] Use more complex logic for url decode. --- .../DocLinkChecker.Test/HyperlinkTests.cs | 22 ++++++ .../DocLinkChecker/Models/Hyperlink.cs | 75 ++++++++++++++++++- .../Services/LinkValidatorService.cs | 20 ++--- 3 files changed, 103 insertions(+), 14 deletions(-) diff --git a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs index d8ca146..40fa5fe 100644 --- a/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs +++ b/src/DocLinkChecker/DocLinkChecker.Test/HyperlinkTests.cs @@ -354,6 +354,28 @@ public async void ValidateInvalidRootLinkShouldHaveErrors(string path) linkError.Severity.Should().Be(MarkdownErrorSeverity.Error); } + [Theory] + // Adopted behaviour from DocFx tests + // Modified that expected result of Encoded var is upper case, instead of same case as original. + [InlineData("a/b/c", "a/b/c")] + [InlineData("../a/b/c", "../a/b/c")] + [InlineData("a/b/c%20d", "a/b/c d")] + [InlineData("../a%2Bb/c/d", "../a+b/c/d")] + [InlineData("a%253fb", "a%3fb")] + [InlineData("a%2fb", "a%2Fb")] + [InlineData("%2A%2F%3A%3F%5C", "%2A%2F%3A%3F%5C")] //*/:?\ + [InlineData("%2a%2f%3a%3f%5c", "%2A%2F%3A%3F%5C")] + public void ValidateLocalUrlDecode(string path, string expected) + { + //Act + int line = 499; + int column = 75; + Hyperlink link = new Hyperlink(null, line, column, path); + + Assert.Equal(path, link.OriginalUrl); + Assert.Equal(expected, link.Url); + } + [Fact] public async void ValidateLocalLinkWithFullPathShouldHaveErrors() { diff --git a/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs b/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs index b8fa21c..3f152ef 100644 --- a/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs +++ b/src/DocLinkChecker/DocLinkChecker/Models/Hyperlink.cs @@ -1,6 +1,9 @@ namespace DocLinkChecker.Models { + using System; using System.IO; + using System.Linq; + using System.Text; using DocLinkChecker.Enums; /// @@ -8,6 +11,10 @@ /// public class Hyperlink : MarkdownObjectBase { + private static readonly char[] UriFragmentOrQueryString = new char[] { '#', '?' }; + private static readonly char[] AdditionalInvalidChars = @"\/?:*".ToArray(); + private static readonly char[] InvalidPathChars = Path.GetInvalidPathChars().Concat(AdditionalInvalidChars).ToArray(); + /// /// Initializes a new instance of the class. /// @@ -26,6 +33,7 @@ public Hyperlink(string filePath, int line, int col, string url) : base(filePath, line, col) { Url = url; + OriginalUrl = Url; LinkType = HyperlinkType.Empty; if (!string.IsNullOrWhiteSpace(url)) @@ -48,10 +56,7 @@ public Hyperlink(string filePath, int line, int col, string url) } else { - // Logic for DocFx RelativePath class is complex - // Cannot use `Uri.UnescapeDataString` as it would also decode `%2F` to `/'. DocFX does not do this. - // To avoid writing a complex logic. Just basic replacing of some chars. - Url = Url.Replace("%20", " ").Replace("%7E", "~"); + Url = UrlDecode(Url); if (Path.GetExtension(url).ToLower() == ".md" || Path.GetExtension(url) == string.Empty) { @@ -72,6 +77,11 @@ public Hyperlink(string filePath, int line, int col, string url) /// public string Url { get; set; } + /// + /// Gets or sets the original URL as found in the Markdown document. Used for reporting to user so they can find the correct location. Url will be modified. + /// + public string OriginalUrl { get; set; } + /// /// Gets or sets a value indicating whether this is a web link. /// @@ -182,5 +192,62 @@ public string UrlFullPath return Url; } } + + /// + /// Decoding of local Urls. Similar to logic from DocFx RelativePath class. + /// https://github.com/dotnet/docfx/blob/cca05f505e30c5ede36973c4b989fce711f2e8ad/src/Docfx.Common/Path/RelativePath.cs . + /// + /// Url. + /// Decoded Url. + private string UrlDecode(string url) + { + // This logic only applies to relative paths. + if (Path.IsPathRooted(url)) + { + return url; + } + + var anchor = string.Empty; + var index = url.IndexOfAny(UriFragmentOrQueryString); + if (index != -1) + { + anchor = url.Substring(index); + url = url.Remove(index); + } + + var parts = url.Split('/', '\\'); + var newUrl = new StringBuilder(); + for (int i = 0; i < parts.Length; i++) + { + if (i > 0) + { + newUrl.Append('/'); + } + + var origin = parts[i]; + var value = Uri.UnescapeDataString(origin); + + var splittedOnInvalidChars = value.Split(InvalidPathChars); + var originIndex = 0; + var valueIndex = 0; + for (int j = 0; j < splittedOnInvalidChars.Length; j++) + { + if (j > 0) + { + var invalidChar = value[valueIndex]; + valueIndex++; + newUrl.Append(Uri.EscapeDataString(invalidChar.ToString())); + } + + var splitOnInvalidChars = splittedOnInvalidChars[j]; + originIndex += splitOnInvalidChars.Length; + valueIndex += splitOnInvalidChars.Length; + newUrl.Append(splitOnInvalidChars); + } + } + + newUrl.Append(anchor); + return newUrl.ToString(); + } } } diff --git a/src/DocLinkChecker/DocLinkChecker/Services/LinkValidatorService.cs b/src/DocLinkChecker/DocLinkChecker/Services/LinkValidatorService.cs index f60ae1a..21a7d5b 100644 --- a/src/DocLinkChecker/DocLinkChecker/Services/LinkValidatorService.cs +++ b/src/DocLinkChecker/DocLinkChecker/Services/LinkValidatorService.cs @@ -189,12 +189,12 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink) if (hyperlink.Url.Matches(whitelist)) { - _console.Verbose($"Skipping whitelisted url {hyperlink.Url}"); + _console.Verbose($"Skipping whitelisted url {hyperlink.OriginalUrl}"); return; } } - _console.Verbose($"Validating {hyperlink.Url} in {_fileService.GetRelativePath(_config.DocumentationFiles.SourceFolder, hyperlink.FilePath)}"); + _console.Verbose($"Validating {hyperlink.OriginalUrl} in {_fileService.GetRelativePath(_config.DocumentationFiles.SourceFolder, hyperlink.FilePath)}"); using var scope = _serviceProvider.CreateScope(); var client = scope.ServiceProvider.GetRequiredService(); @@ -204,7 +204,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink) sw.Stop(); if (sw.ElapsedMilliseconds > _config.DocLinkChecker.ExternalLinkDurationWarning) { - _console.Warning($"*** WARNING: Checking {hyperlink.Url} took {sw.ElapsedMilliseconds}ms."); + _console.Warning($"*** WARNING: Checking {hyperlink.OriginalUrl} took {sw.ElapsedMilliseconds}ms."); } if (!result.success) @@ -229,7 +229,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, severity, - $"{hyperlink.Url} => {result.statusCode}")); + $"{hyperlink.OriginalUrl} => {result.statusCode}")); } } else @@ -241,7 +241,7 @@ private async Task VerifyWebHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"{hyperlink.Url} => {result.error}")); + $"{hyperlink.OriginalUrl} => {result.error}")); } } } @@ -268,7 +268,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"Full path not allowed as link: {hyperlink.Url}")); + $"Full path not allowed as link: {hyperlink.OriginalUrl}")); return Task.CompletedTask; } @@ -285,7 +285,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"Not found: {hyperlink.Url}")); + $"Not found: {hyperlink.OriginalUrl}")); return Task.CompletedTask; } } @@ -301,7 +301,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"Not found: {hyperlink.Url}")); + $"Not found: {hyperlink.OriginalUrl}")); return Task.CompletedTask; } } @@ -317,7 +317,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"File referenced outside of the same /docs hierarchy not allowed: {hyperlink.Url}")); + $"File referenced outside of the same /docs hierarchy not allowed: {hyperlink.OriginalUrl}")); return Task.CompletedTask; } @@ -332,7 +332,7 @@ private Task VerifyLocalHyperlink(Hyperlink hyperlink) hyperlink.Line, hyperlink.Column, MarkdownErrorSeverity.Error, - $"File referenced outside of anything else then a /docs hierarchy not allowed: {hyperlink.Url}")); + $"File referenced outside of anything else then a /docs hierarchy not allowed: {hyperlink.OriginalUrl}")); return Task.CompletedTask; }