Skip to content

Commit

Permalink
add optional list support
Browse files Browse the repository at this point in the history
  • Loading branch information
Mario Hros committed May 10, 2023
1 parent a58537e commit b9e0d6e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
16 changes: 14 additions & 2 deletions html2text.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var numericEntityRE = regexp.MustCompile(`(?i)^#(x?[a-f0-9]+)$`)
type options struct {
lbr string
linksInnerText bool
listSupport bool
}

func newOptions() *options {
Expand Down Expand Up @@ -51,6 +52,13 @@ func WithLinksInnerText() Option {
}
}

// WithListSupport formats <ul> and <li> lists with dashes
func WithListSupport() Option {
return func(o *options) {
o.listSupport = true
}
}

func parseHTMLEntity(entName string) (string, bool) {
if r, ok := entity[entName]; ok {
return string(r), true
Expand Down Expand Up @@ -231,10 +239,14 @@ func HTML2TextWithOptions(html string, reqOpts ...Option) string {
tag := html[tagStart:i]
tagNameLowercase := strings.ToLower(tag)

if tagNameLowercase == "/ul" {
if tagNameLowercase == "/ul" || tagNameLowercase == "/ol" {
outBuf.WriteString(opts.lbr)
} else if tagNameLowercase == "li" || tagNameLowercase == "li/" {
outBuf.WriteString(opts.lbr)
if opts.listSupport {
outBuf.WriteString(opts.lbr + "- ")
} else {
outBuf.WriteString(opts.lbr)
}
} else if headersRE.MatchString(tagNameLowercase) {
if canPrintNewline {
outBuf.WriteString(opts.lbr + opts.lbr)
Expand Down
10 changes: 10 additions & 0 deletions html2text_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func TestHTML2Text(t *testing.T) {
So(HTML2Text(``), ShouldEqual, "")
So(HTML2Text(`<html><head><title>Good</title></head><body>x</body>`), ShouldEqual, "x")
So(HTML2Text(`<html><head href="foo"><title>Good</title></head><body>x</body>`), ShouldEqual, "x")
So(HTML2Text(`<htMl><hEad><titLe>Good</Title></head><boDy>x</Body>`), ShouldEqual, "x")
So(HTML2Text(`we are not <script type="javascript"></script>interested in scripts`),
ShouldEqual, "we are not interested in scripts")
})
Expand All @@ -122,6 +123,15 @@ func TestHTML2Text(t *testing.T) {
So(HTML2TextWithOptions(`<p>two</p><p>paragraphs</p>`), ShouldEqual, "two\r\n\r\nparagraphs")
})

Convey("No list support by default (original behavior)", func() {
So(HTML2Text(`list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>`), ShouldEqual, "list of items\r\nOne\r\nTwo\r\nThree\r\n")
})

Convey("Optional list support", func() {
So(HTML2TextWithOptions(`list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>`, WithListSupport()), ShouldEqual, "list of items\r\n- One\r\n- Two\r\n- Three\r\n")
So(HTML2TextWithOptions(`list of items<ol><li>One</li><li>Two</li><li>Three</li></ol>`, WithListSupport()), ShouldEqual, "list of items\r\n- One\r\n- Two\r\n- Three\r\n")
})

Convey("Custom HTML Tags", func() {
So(HTML2Text(`<aa>hello</aa>`), ShouldEqual, "hello")
So(HTML2Text(`<aa >hello</aa>`), ShouldEqual, "hello")
Expand Down

0 comments on commit b9e0d6e

Please sign in to comment.