Skip to content

Commit

Permalink
Merge pull request #49 from Ellerbach/doclinkchecker-unicode-fix
Browse files Browse the repository at this point in the history
Doclinkchecker unicode fix
  • Loading branch information
mtirionMSFT authored Jan 18, 2024
2 parents 22916c8 + 93c8512 commit 60662da
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ internal static string AddLink(this string s, string url)
return s + Environment.NewLine + content;
}

internal static string AddCodeLink(this string s, string name, string url)
{
Faker faker = new Faker();
string content = $" [!code-csharp[{name}]({url})]" + Environment.NewLine;
if (string.IsNullOrEmpty(s))
{
return content;
}
return s + Environment.NewLine + content;
}

internal static string AddTableStart(this string s, int columns = 3)
{
Faker faker = new Faker();
Expand Down Expand Up @@ -102,5 +113,14 @@ internal static string AddRawMarkdown(this string s, string markdown)
}
return s + markdown;
}

internal static string AddRawContent(this string s, string content)
{
if (string.IsNullOrEmpty(s))
{
return content;
}
return s + content;
}
}
}
28 changes: 27 additions & 1 deletion src/DocLinkChecker/DocLinkChecker.Test/MarkdownTests.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace DocLinkChecker.Test
namespace DocLinkChecker.Test
{
using System.Linq;
using Bogus;
using DocLinkChecker.Helpers;
using DocLinkChecker.Models;
using DocLinkChecker.Test.Helpers;
Expand Down Expand Up @@ -81,6 +82,31 @@ public void FindAllHeadings()
headings.Count.Should().Be(6);
}

[Fact]
public void FindAllHeadingsWithUnicodeCharacters()
{
string markdown = string.Empty
.AddHeading("Test Unicode Characters", 1)
.AddParagraphs(1).AddLink("#")
.AddHeading("abcdefghijklmnopqrstuvwxyz 0123456789", 2)
.AddParagraphs(1)
.AddHeading("ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789", 2)
.AddParagraphs(1)
.AddHeading("UNICODE-!@#$%^&*+=~`<>,.?/:;€|Æäßéóčúįǯ-CHARS", 2)
.AddParagraphs(1);

var result = MarkdownHelper.ParseMarkdownString(string.Empty, markdown, true);

var headings = result.objects
.OfType<Heading>()
.ToList();

headings.Count.Should().Be(4);
headings[1].Id.Should().Be("abcdefghijklmnopqrstuvwxyz-0123456789");
headings[2].Id.Should().Be("abcdefghijklmnopqrstuvwxyz-0123456789");
headings[3].Id.Should().Be("unicode-æäßéóčúįǯ-chars");
}

[Fact]
public void FindAllTables()
{
Expand Down
4 changes: 3 additions & 1 deletion src/DocLinkChecker/DocLinkChecker/Helpers/MarkdownHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ public static (List<MarkdownObjectBase> objects, List<MarkdownError> errors)
// custom generation of the id
string id = title.ToLower();
id = Regex.Replace(id, "[ _]", "-");
id = Regex.Replace(id, "[^a-zA-Z0-9-]*", string.Empty);

// replace all non-characters. \p[L] takes all unicode variants in account as well like ö and á
id = Regex.Replace(id, @"[^\p{L}0-9-]*", string.Empty);

return new Heading(markdownFilePath, x.Line + 1, x.Column + 1, title, id);
})
Expand Down

This file was deleted.

0 comments on commit 60662da

Please sign in to comment.