htmlquery is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.
$ go get github.com/Aiicy/htmlquery
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
doc, err := htmlquery.LoadURL(ctx,"http://example.com/")
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
header := map[string]string {
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
}
doc,err := htmlquery.LoadURLWithHeader(ctx,"http://example.com/",header)
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
doc,err := htmlquery.LoadURLWithProxy(ctx,"http://example.com/","http://proxyip:proxyport")
s := `<html>....</html>`
doc, err := htmlquery.Parse(strings.NewReader(s))
list := htmlquery.Find(doc, "//a")
list := range htmlquery.Find(doc, "//a/@href")
a := htmlquery.FindOne(doc, "//a[3]")
expr, _ := xpath.Compile("count(//img)")
v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
fmt.Printf("total count is %f", v)
package main
import (
"fmt"
"context"
"github.com/Aiicy/htmlquery"
)
func main() {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
doc, err := htmlquery.LoadURL(ctx,"https://www.bing.com/search?q=golang")
if err != nil {
panic(err)
}
// Find all news item.
for i, n := range htmlquery.Find(doc, "//ol/li") {
a := htmlquery.FindOne(n, "//a")
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
}
}
Name | Description |
---|---|
htmlquery | XPath query package for the HTML document |
xmlquery | XPath query package for the XML document |
jsonquery | XPath query package for the JSON document |
If you have any questions, create an issue and welcome to contribute.