From fe3edcfee417c546d613ed566a895e24a6c0ce8e Mon Sep 17 00:00:00 2001
From: dewey
Date: Fri, 31 Aug 2018 22:18:12 +0200
Subject: [PATCH] Vendor go-readability and gofeed
---
.../mauidude/go-readability/README.md | 36 +
.../mauidude/go-readability/readability.go | 552 +
vendor/github.com/mmcdole/gofeed/LICENSE | 21 +
vendor/github.com/mmcdole/gofeed/README.md | 254 +
vendor/github.com/mmcdole/gofeed/atom/feed.go | 114 +
.../github.com/mmcdole/gofeed/atom/parser.go | 722 +
vendor/github.com/mmcdole/gofeed/detector.go | 47 +
.../mmcdole/gofeed/extensions/dublincore.go | 45 +
.../mmcdole/gofeed/extensions/extensions.go | 46 +
.../mmcdole/gofeed/extensions/itunes.go | 142 +
vendor/github.com/mmcdole/gofeed/feed.go | 84 +
.../gofeed/internal/shared/charsetconv.go | 19 +
.../gofeed/internal/shared/dateparser.go | 196 +
.../gofeed/internal/shared/extparser.go | 176 +
.../gofeed/internal/shared/parseutils.go | 196 +
.../gofeed/internal/shared/xmlsanitizer.go | 23 +
vendor/github.com/mmcdole/gofeed/parser.go | 150 +
vendor/github.com/mmcdole/gofeed/rss/feed.go | 120 +
.../github.com/mmcdole/gofeed/rss/parser.go | 767 +
.../github.com/mmcdole/gofeed/translator.go | 686 +
vendor/github.com/mmcdole/goxpp/LICENSE | 21 +
vendor/github.com/mmcdole/goxpp/README.md | 43 +
vendor/github.com/mmcdole/goxpp/xpp.go | 341 +
.../golang.org/x/net/html/charset/charset.go | 257 +
vendor/golang.org/x/text/LICENSE | 27 +
vendor/golang.org/x/text/PATENTS | 22 +
.../x/text/encoding/charmap/charmap.go | 249 +
.../x/text/encoding/charmap/maketables.go | 556 +
.../x/text/encoding/charmap/tables.go | 7410 +++
vendor/golang.org/x/text/encoding/encoding.go | 335 +
.../x/text/encoding/htmlindex/gen.go | 173 +
.../x/text/encoding/htmlindex/htmlindex.go | 86 +
.../x/text/encoding/htmlindex/map.go | 105 +
.../x/text/encoding/htmlindex/tables.go | 353 +
.../text/encoding/internal/identifier/gen.go | 137 +
.../internal/identifier/identifier.go | 81 +
.../text/encoding/internal/identifier/mib.go | 1621 +
.../x/text/encoding/internal/internal.go | 75 +
.../x/text/encoding/japanese/all.go | 12 +
.../x/text/encoding/japanese/eucjp.go | 225 +
.../x/text/encoding/japanese/iso2022jp.go | 299 +
.../x/text/encoding/japanese/maketables.go | 161 +
.../x/text/encoding/japanese/shiftjis.go | 189 +
.../x/text/encoding/japanese/tables.go | 26971 ++++++++++
.../x/text/encoding/korean/euckr.go | 177 +
.../x/text/encoding/korean/maketables.go | 143 +
.../x/text/encoding/korean/tables.go | 34152 ++++++++++++
.../x/text/encoding/simplifiedchinese/all.go | 12 +
.../x/text/encoding/simplifiedchinese/gbk.go | 269 +
.../encoding/simplifiedchinese/hzgb2312.go | 245 +
.../encoding/simplifiedchinese/maketables.go | 161 +
.../text/encoding/simplifiedchinese/tables.go | 43999 ++++++++++++++++
.../text/encoding/traditionalchinese/big5.go | 199 +
.../encoding/traditionalchinese/maketables.go | 140 +
.../encoding/traditionalchinese/tables.go | 37142 +++++++++++++
.../x/text/encoding/unicode/override.go | 82 +
.../x/text/encoding/unicode/unicode.go | 434 +
.../x/text/internal/language/common.go | 16 +
.../x/text/internal/language/compact.go | 29 +
.../text/internal/language/compact/compact.go | 61 +
.../x/text/internal/language/compact/gen.go | 64 +
.../internal/language/compact/gen_index.go | 113 +
.../internal/language/compact/gen_parents.go | 54 +
.../internal/language/compact/language.go | 260 +
.../text/internal/language/compact/parents.go | 120 +
.../text/internal/language/compact/tables.go | 1015 +
.../x/text/internal/language/compact/tags.go | 91 +
.../x/text/internal/language/compose.go | 167 +
.../x/text/internal/language/coverage.go | 28 +
.../x/text/internal/language/gen.go | 1520 +
.../x/text/internal/language/gen_common.go | 20 +
.../x/text/internal/language/language.go | 596 +
.../x/text/internal/language/lookup.go | 412 +
.../x/text/internal/language/match.go | 226 +
.../x/text/internal/language/parse.go | 594 +
.../x/text/internal/language/tables.go | 3431 ++
.../x/text/internal/language/tags.go | 48 +
vendor/golang.org/x/text/internal/tag/tag.go | 100 +
.../internal/utf8internal/utf8internal.go | 87 +
vendor/golang.org/x/text/language/coverage.go | 187 +
vendor/golang.org/x/text/language/doc.go | 102 +
vendor/golang.org/x/text/language/gen.go | 305 +
vendor/golang.org/x/text/language/go1_1.go | 38 +
vendor/golang.org/x/text/language/go1_2.go | 11 +
vendor/golang.org/x/text/language/language.go | 596 +
vendor/golang.org/x/text/language/match.go | 735 +
vendor/golang.org/x/text/language/parse.go | 228 +
vendor/golang.org/x/text/language/tables.go | 298 +
vendor/golang.org/x/text/language/tags.go | 145 +
vendor/golang.org/x/text/runes/cond.go | 187 +
vendor/golang.org/x/text/runes/runes.go | 355 +
.../golang.org/x/text/transform/transform.go | 705 +
vendor/vendor.json | 150 +
93 files changed, 174394 insertions(+)
create mode 100644 vendor/github.com/mauidude/go-readability/README.md
create mode 100644 vendor/github.com/mauidude/go-readability/readability.go
create mode 100644 vendor/github.com/mmcdole/gofeed/LICENSE
create mode 100644 vendor/github.com/mmcdole/gofeed/README.md
create mode 100644 vendor/github.com/mmcdole/gofeed/atom/feed.go
create mode 100644 vendor/github.com/mmcdole/gofeed/atom/parser.go
create mode 100644 vendor/github.com/mmcdole/gofeed/detector.go
create mode 100644 vendor/github.com/mmcdole/gofeed/extensions/dublincore.go
create mode 100644 vendor/github.com/mmcdole/gofeed/extensions/extensions.go
create mode 100644 vendor/github.com/mmcdole/gofeed/extensions/itunes.go
create mode 100644 vendor/github.com/mmcdole/gofeed/feed.go
create mode 100644 vendor/github.com/mmcdole/gofeed/internal/shared/charsetconv.go
create mode 100644 vendor/github.com/mmcdole/gofeed/internal/shared/dateparser.go
create mode 100644 vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go
create mode 100644 vendor/github.com/mmcdole/gofeed/internal/shared/parseutils.go
create mode 100644 vendor/github.com/mmcdole/gofeed/internal/shared/xmlsanitizer.go
create mode 100644 vendor/github.com/mmcdole/gofeed/parser.go
create mode 100644 vendor/github.com/mmcdole/gofeed/rss/feed.go
create mode 100644 vendor/github.com/mmcdole/gofeed/rss/parser.go
create mode 100644 vendor/github.com/mmcdole/gofeed/translator.go
create mode 100644 vendor/github.com/mmcdole/goxpp/LICENSE
create mode 100644 vendor/github.com/mmcdole/goxpp/README.md
create mode 100644 vendor/github.com/mmcdole/goxpp/xpp.go
create mode 100644 vendor/golang.org/x/net/html/charset/charset.go
create mode 100644 vendor/golang.org/x/text/LICENSE
create mode 100644 vendor/golang.org/x/text/PATENTS
create mode 100644 vendor/golang.org/x/text/encoding/charmap/charmap.go
create mode 100644 vendor/golang.org/x/text/encoding/charmap/maketables.go
create mode 100644 vendor/golang.org/x/text/encoding/charmap/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/encoding.go
create mode 100644 vendor/golang.org/x/text/encoding/htmlindex/gen.go
create mode 100644 vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
create mode 100644 vendor/golang.org/x/text/encoding/htmlindex/map.go
create mode 100644 vendor/golang.org/x/text/encoding/htmlindex/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/gen.go
create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/mib.go
create mode 100644 vendor/golang.org/x/text/encoding/internal/internal.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/all.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/eucjp.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/maketables.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/shiftjis.go
create mode 100644 vendor/golang.org/x/text/encoding/japanese/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/korean/euckr.go
create mode 100644 vendor/golang.org/x/text/encoding/korean/maketables.go
create mode 100644 vendor/golang.org/x/text/encoding/korean/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/all.go
create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
create mode 100644 vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
create mode 100644 vendor/golang.org/x/text/encoding/traditionalchinese/tables.go
create mode 100644 vendor/golang.org/x/text/encoding/unicode/override.go
create mode 100644 vendor/golang.org/x/text/encoding/unicode/unicode.go
create mode 100644 vendor/golang.org/x/text/internal/language/common.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/compact.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_index.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_parents.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/language.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/parents.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/tables.go
create mode 100644 vendor/golang.org/x/text/internal/language/compact/tags.go
create mode 100644 vendor/golang.org/x/text/internal/language/compose.go
create mode 100644 vendor/golang.org/x/text/internal/language/coverage.go
create mode 100644 vendor/golang.org/x/text/internal/language/gen.go
create mode 100644 vendor/golang.org/x/text/internal/language/gen_common.go
create mode 100644 vendor/golang.org/x/text/internal/language/language.go
create mode 100644 vendor/golang.org/x/text/internal/language/lookup.go
create mode 100644 vendor/golang.org/x/text/internal/language/match.go
create mode 100644 vendor/golang.org/x/text/internal/language/parse.go
create mode 100644 vendor/golang.org/x/text/internal/language/tables.go
create mode 100644 vendor/golang.org/x/text/internal/language/tags.go
create mode 100644 vendor/golang.org/x/text/internal/tag/tag.go
create mode 100644 vendor/golang.org/x/text/internal/utf8internal/utf8internal.go
create mode 100644 vendor/golang.org/x/text/language/coverage.go
create mode 100644 vendor/golang.org/x/text/language/doc.go
create mode 100644 vendor/golang.org/x/text/language/gen.go
create mode 100644 vendor/golang.org/x/text/language/go1_1.go
create mode 100644 vendor/golang.org/x/text/language/go1_2.go
create mode 100644 vendor/golang.org/x/text/language/language.go
create mode 100644 vendor/golang.org/x/text/language/match.go
create mode 100644 vendor/golang.org/x/text/language/parse.go
create mode 100644 vendor/golang.org/x/text/language/tables.go
create mode 100644 vendor/golang.org/x/text/language/tags.go
create mode 100644 vendor/golang.org/x/text/runes/cond.go
create mode 100644 vendor/golang.org/x/text/runes/runes.go
create mode 100644 vendor/golang.org/x/text/transform/transform.go
diff --git a/vendor/github.com/mauidude/go-readability/README.md b/vendor/github.com/mauidude/go-readability/README.md
new file mode 100644
index 0000000..16a424b
--- /dev/null
+++ b/vendor/github.com/mauidude/go-readability/README.md
@@ -0,0 +1,36 @@
+go-readability
+==============
+
+go-readability is library for extracting the main content off of an HTML page. This library implements the readability algorithm created by arc90 labs and was heavily inspired by https://github.com/cantino/ruby-readability.
+
+Installation
+------------
+
+`go get github.com/mauidude/go-readability`
+
+Example
+-------
+
+```
+import(
+ "github.com/mauidude/go-readability"
+)
+
+...
+
+doc, err := readability.NewDocument(html)
+if err != nil {
+ // do something ...
+}
+
+content := doc.Content()
+// do something with my content
+
+```
+
+
+Tests
+-----
+
+To run tests
+`go test github.com/mauidude/go-readability`
diff --git a/vendor/github.com/mauidude/go-readability/readability.go b/vendor/github.com/mauidude/go-readability/readability.go
new file mode 100644
index 0000000..54b4e3c
--- /dev/null
+++ b/vendor/github.com/mauidude/go-readability/readability.go
@@ -0,0 +1,552 @@
+package readability
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "math"
+ "regexp"
+ "strings"
+
+ "github.com/PuerkitoBio/goquery"
+ "golang.org/x/net/html"
+)
+
+var (
+ Logger = log.New(ioutil.Discard, "[readability] ", log.LstdFlags)
+
+ replaceBrsRegexp = regexp.MustCompile(`(?i)(
]*>[ \n\r\t]*){2,}`)
+ replaceFontsRegexp = regexp.MustCompile(`(?i)<(\/?)\s*font[^>]*?>`)
+
+ blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody`)
+ okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
+ unlikelyCandidatesRegexp = regexp.MustCompile(`(?i)combx|comment|community|hidden|disqus|modal|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup`)
+ divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
+
+ negativeRegexp = regexp.MustCompile(`(?i)combx|comment|com-|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget`)
+ positiveRegexp = regexp.MustCompile(`(?i)article|body|content|entry|hentry|main|page|pagination|post|text|blog|story`)
+
+ stripCommentRegexp = regexp.MustCompile(`(?s)\<\!\-{2}.+?-{2}\>`)
+
+ sentenceRegexp = regexp.MustCompile(`\.( |$)`)
+
+ normalizeWhitespaceRegexp = regexp.MustCompile(`[\r\n\f]+`)
+)
+
+type candidate struct {
+ selection *goquery.Selection
+ score float32
+}
+
+func (c *candidate) Node() *html.Node {
+ return c.selection.Get(0)
+}
+
+type Document struct {
+ input string
+ document *goquery.Document
+ content string
+ candidates map[*html.Node]*candidate
+ bestCandidate *candidate
+
+ RemoveUnlikelyCandidates bool
+ WeightClasses bool
+ CleanConditionally bool
+ BestCandidateHasImage bool
+ RetryLength int
+ MinTextLength int
+ RemoveEmptyNodes bool
+ WhitelistTags []string
+}
+
+func NewDocument(s string) (*Document, error) {
+ d := &Document{
+ input: s,
+ WhitelistTags: []string{"div", "p"},
+ RemoveUnlikelyCandidates: true,
+ WeightClasses: true,
+ CleanConditionally: true,
+ RetryLength: 250,
+ MinTextLength: 25,
+ RemoveEmptyNodes: true,
+ }
+ err := d.initializeHtml(s)
+ if err != nil {
+ return nil, err
+ }
+
+ return d, nil
+}
+
+func (d *Document) initializeHtml(s string) error {
+ // replace consecutive
's with p tags
+ s = replaceBrsRegexp.ReplaceAllString(s, "
")
+
+ // replace font tags
+ s = replaceFontsRegexp.ReplaceAllString(s, `<${1}span>`)
+
+ // manually strip regexps since html parser seems to miss some
+ s = stripCommentRegexp.ReplaceAllString(s, "")
+
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(s))
+ if err != nil {
+ return err
+ }
+
+ // if no body (like from a redirect or empty string)
+ if doc.Find("body").Length() == 0 {
+ s = "