Skip to content

Commit

Permalink
Merge pull request #7 from gosom/add-ordered-headers
Browse files Browse the repository at this point in the history
feat: Add specific browser headers for StealthMode
  • Loading branch information
gosom authored Jan 1, 2025
2 parents 61d34b5 + c69421b commit 5154a23
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 5 deletions.
113 changes: 113 additions & 0 deletions adapters/fetchers/stealth/browsers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package stealth

import "github.com/Noooste/azuretls-client"

type settings struct {
browser string
headers azuretls.OrderedHeaders
}

func newSettings(browser string) settings {
ans := settings{
browser: browser,
}

switch browser {
case azuretls.Chrome:
ans.headers = chromeHeaders()
case azuretls.Firefox:
ans.headers = firefoxHeaders()
case azuretls.Opera:
ans.headers = operaHeaders()
case azuretls.Safari:
ans.headers = safariHeaders()
case azuretls.Edge:
ans.headers = edgeHeaders()
}

return ans
}

func edgeHeaders() azuretls.OrderedHeaders {
return azuretls.OrderedHeaders{
{"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"},
{"Accept-Encoding", "gzip, deflate, br"},
{"Accept-Language", "en-US,en;q=0.9"},
{"Cache-Control", "max-age=0"},
{"Sec-Ch-Ua", `"Microsoft Edge";v="119", "Chromium";v="119", "Not?A_Brand";v="24"`},
{"Sec-Ch-Ua-Mobile", "?0"},
{"Sec-Ch-Ua-Platform", `"Windows"`},
{"Sec-Fetch-Dest", "document"},
{"Sec-Fetch-Mode", "navigate"},
{"Sec-Fetch-Site", "none"},
{"Sec-Fetch-User", "?1"},
{"Upgrade-Insecure-Requests", "1"},
{"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"},
}
}

func chromeHeaders() azuretls.OrderedHeaders {
return azuretls.OrderedHeaders{
{"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"},
{"Accept-Encoding", "gzip, deflate, br"},
{"Accept-Language", "en-US,en;q=0.9"},
{"Cache-Control", "max-age=0"},
{"Sec-Ch-Ua", `"Google Chrome";v="120", "Chromium";v="120", "Not?A_Brand";v="24"`},
{"Sec-Ch-Ua-Mobile", "?0"},
{"Sec-Ch-Ua-Platform", `"Windows"`},
{"Sec-Fetch-Dest", "document"},
{"Sec-Fetch-Mode", "navigate"},
{"Sec-Fetch-Site", "none"},
{"Sec-Fetch-User", "?1"},
{"Upgrade-Insecure-Requests", "1"},
{"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"},
}
}

func firefoxHeaders() azuretls.OrderedHeaders {
return azuretls.OrderedHeaders{
{"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"},
{"Accept-Encoding", "gzip, deflate, br"},
{"Accept-Language", "en-US,en;q=0.5"},
{"Cache-Control", "max-age=0"},
{"DNT", "1"},
{"Sec-Fetch-Dest", "document"},
{"Sec-Fetch-Mode", "navigate"},
{"Sec-Fetch-Site", "none"},
{"Sec-Fetch-User", "?1"},
{"Upgrade-Insecure-Requests", "1"},
{"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"},
}
}

func operaHeaders() azuretls.OrderedHeaders {
return azuretls.OrderedHeaders{
{"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"},
{"Accept-Encoding", "gzip, deflate, br"},
{"Accept-Language", "en-US,en;q=0.9"},
{"Cache-Control", "max-age=0"},
{"Sec-Ch-Ua", `"Opera";v="103", "Chromium";v="117", "Not;A=Brand";v="8"`},
{"Sec-Ch-Ua-Mobile", "?0"},
{"Sec-Ch-Ua-Platform", `"Windows"`},
{"Sec-Fetch-Dest", "document"},
{"Sec-Fetch-Mode", "navigate"},
{"Sec-Fetch-Site", "none"},
{"Sec-Fetch-User", "?1"},
{"Upgrade-Insecure-Requests", "1"},
{"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 OPR/103.0.0.0"},
}
}

func safariHeaders() azuretls.OrderedHeaders {
return azuretls.OrderedHeaders{
{"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},
{"Accept-Encoding", "gzip, deflate, br"},
{"Accept-Language", "en-US,en;q=0.9"},
{"Cache-Control", "max-age=0"},
{"Sec-Fetch-Dest", "document"},
{"Sec-Fetch-Mode", "navigate"},
{"Sec-Fetch-Site", "none"},
{"Upgrade-Insecure-Requests", "1"},
{"User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15"},
}
}
14 changes: 11 additions & 3 deletions adapters/fetchers/stealth/stealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,17 @@ import (
)

type stealthFetch struct {
browserSettings settings
}

func New() scrapemate.HTTPFetcher {
return &stealthFetch{}
func New(browser ...string) scrapemate.HTTPFetcher {
ans := stealthFetch{}

if len(browser) > 0 {
ans.browserSettings = newSettings(browser[0])
}

return &ans
}

func (o *stealthFetch) Close() error {
Expand All @@ -36,7 +43,8 @@ func (o *stealthFetch) Fetch(ctx context.Context, job scrapemate.IJob) scrapemat

defer session.Close()

session.Browser = azuretls.Firefox
session.Browser = o.browserSettings.browser
session.OrderedHeaders = o.browserSettings.headers

req := azuretls.Request{
Method: job.GetMethod(),
Expand Down
5 changes: 4 additions & 1 deletion scrapemateapp/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ func WithJS(opts ...func(*jsOptions)) func(*Config) error {
}
}

func WithStealth() func(*Config) error {
func WithStealth(browser string) func(*Config) error {
return func(o *Config) error {
o.UseStealth = true
o.StealthBrowser = browser

return o.validate()
}
Expand Down Expand Up @@ -147,6 +148,8 @@ type Config struct {
// UseStealth is whether to use stealth mode to scrape the page.
// uses a special http client to scrape the page.
UseStealth bool `validate:"omitempty"`
// StealthBrowser is the browser to use for stealth mode.
StealthBrowser string `validate:"omitempty"`
// JSOpts are the options for the JavaScript renderer.
JSOpts jsOptions

Expand Down
2 changes: 1 addition & 1 deletion scrapemateapp/scrapemateapp.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func (app *ScrapemateApp) getFetcher() (scrapemate.HTTPFetcher, error) {
}
default:
if app.cfg.UseStealth {
httpFetcher = stealth.New()
httpFetcher = stealth.New(app.cfg.StealthBrowser)
} else {
cookieJar, err := cookiejar.New(nil)
if err != nil {
Expand Down

0 comments on commit 5154a23

Please sign in to comment.