forked from dbalduini/smeago
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
73 lines (62 loc) · 1.4 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
package main
import (
"flag"
"github.com/Demuzx/smeago/src"
"log"
"os"
"path"
"sort"
"time"
)
var (
host string
port string
urlLoc string
outputDir string
userAgent string
)
func main() {
flag.Parse()
start := time.Now()
origin := host
if port != "80" {
origin += ":" + port
}
if urlLoc == "" {
urlLoc = origin
}
s := &smeago.Sitemap{}
s.Filename = path.Join(outputDir, "sitemap.xml")
s.Path = urlLoc
log.Println("Crawling Host:", origin)
log.Println("Urlset Loc:", s.Path)
log.Println("Sitemap File:", s.Filename)
// Start crawling on the home page
c := smeago.NewCrawler(origin, 30*time.Second, 3)
c.AddHeader("User-Agent", userAgent)
cs := smeago.NewCrawlerSupervisor(c)
cs.AddJobToBuffer("/")
// Block main until the crawler is done
done := make(chan bool, 1)
cs.Start(done)
<-done
close(done)
s.Links = cs.GetVisitedLinks()
sort.Strings(s.Links)
if err := s.WriteToFile(); err != nil {
log.Println(err)
}
log.Println("Finished in", time.Since(start))
}
func init() {
wordDir, err := os.Getwd()
if err != nil {
log.Println(err)
wordDir = ""
}
flag.StringVar(&host, "h", "http://localhost", "the host name")
flag.StringVar(&port, "p", "80", "the host port")
flag.StringVar(&urlLoc, "loc", "", "the prefix of sitemap loc tags")
flag.StringVar(&outputDir, "o", wordDir, "the sitemap output dir")
flag.StringVar(&userAgent, "a", "", "user agent")
}