Skip to content

Commit

Permalink
新增 启动浏览器时屏蔽图片
Browse files Browse the repository at this point in the history
  • Loading branch information
erma0 committed Jun 22, 2023
1 parent dd8925b commit 69f7dcd
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 43 deletions.
85 changes: 51 additions & 34 deletions browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@

class Browser(object):

def __init__(self, channel: str = 'msedge', need_login: bool = True, headless: bool = True, ua: str = 'pc'):
def __init__(self,
channel: str = 'msedge',
need_login: bool = True,
headless: bool = True,
ua: str = 'pc',
image: bool = False):
"""
可用对象包括:
self.context
Expand All @@ -17,29 +22,15 @@ def __init__(self, channel: str = 'msedge', need_login: bool = True, headless: b
不能在同一线程内多次创建playwright实例,不能在不同线程调用同一个全局playwright对象
若需要在线程内调用,则需要在每个线程内创建playwright实例,可参考do_login写法
"""
self.start(channel, need_login, headless, ua)
self.start(channel, need_login, headless, ua, image)

def start(self, channel, need_login, headless, ua) -> BrowserContext:
def anti_js(self):
"""
启动浏览器
注入js反检测,没用
"""
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.launch(channel=channel,
headless=headless,
args=['--disable-blink-features=AutomationControlled'])
if ua == 'pc':
self._ua: dict = self.playwright.devices['Desktop Edge']
else:
self._ua = self.playwright.devices['iPhone 12']
if need_login: # 重用登录状态
self.do_login()
else:
self.context = self.browser.new_context(
**self._ua,
permissions=['notifications'],
ignore_https_errors=True,
)
# self.anti_js()
# js ="./js/anti.js"
js = "./js/stealth.min.js"
self.context.add_init_script(path=js)

def do_login(self):
"""
Expand All @@ -63,6 +54,37 @@ def do_login(self):
self.context.clear_cookies()
self.context.add_cookies(cookies)

def start(self, channel, need_login, headless, ua, image) -> BrowserContext:
"""
启动浏览器
"""
_args = [
'--disable-blink-features=AutomationControlled',
]
if not image: # 不显示图片
_args.append("--blink-settings=imagesEnabled=false")
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.launch(
channel=channel,
headless=headless,
ignore_default_args=['--enable-automation'],
args=_args,
)
if ua == 'pc':
self._ua: dict = self.playwright.devices['Desktop Edge']
# self._ua['user_agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50'
else:
self._ua = self.playwright.devices['iPhone 12']
if need_login: # 重用登录状态
self.do_login()
else:
self.context = self.browser.new_context(
**self._ua,
permissions=['notifications'],
ignore_https_errors=True,
)
# self.anti_js()

def stop(self):
"""
关闭浏览器
Expand All @@ -71,20 +93,15 @@ def stop(self):
self.browser.close()
self.playwright.stop()

def anti_js(self):
"""
注入js反检测,没用到
"""
js = """
window.chrome = {"app":{"isInstalled":false,"InstallState":{"DISABLED":"disabled","INSTALLED":"installed","NOT_INSTALLED":"not_installed"},"RunningState":{"CANNOT_RUN":"cannot_run","READY_TO_RUN":"ready_to_run","RUNNING":"running"}},"runtime":{"OnInstalledReason":{"CHROME_UPDATE":"chrome_update","INSTALL":"install","SHARED_MODULE_UPDATE":"shared_module_update","UPDATE":"update"},"OnRestartRequiredReason":{"APP_UPDATE":"app_update","OS_UPDATE":"os_update","PERIODIC":"periodic"},"PlatformArch":{"ARM":"arm","ARM64":"arm64","MIPS":"mips","MIPS64":"mips64","X86_32":"x86-32","X86_64":"x86-64"},"PlatformNaclArch":{"ARM":"arm","MIPS":"mips","MIPS64":"mips64","X86_32":"x86-32","X86_64":"x86-64"},"PlatformOs":{"ANDROID":"android","CROS":"cros","LINUX":"linux","MAC":"mac","OPENBSD":"openbsd","WIN":"win"},"RequestUpdateCheckStatus":{"NO_UPDATE":"no_update","THROTTLED":"throttled","UPDATE_AVAILABLE":"update_available"}}};
Object.defineProperty(navigator,'plugins',{get:()=>[{0:{type:"application/x-google-chrome-pdf",suffixes:"pdf",description:"Portable Document Format",enabledPlugin:Plugin},description:"Portable Document Format",filename:"internal-pdf-viewer",length:1,name:"Chrome PDF Plugin"},{0:{type:"application/pdf",suffixes:"pdf",description:"",enabledPlugin:Plugin},description:"",filename:"mhjfbmdgcfjbbpaeojofohoefgiehjai",length:1,name:"Chrome PDF Viewer"}]});
"""
self.context.add_init_script(js)


if __name__ == "__main__":
edge = Browser(headless=False)
edge = Browser()
# edge = Browser(headless=False)
p = edge.context.new_page()
p.goto('http://baidu.com')
input()
# p.goto('https://antispider1.scrape.center/')
# p.goto('https://antoinevastel.com/bots/')
# p.keyboard.press('End')
p.goto('https://antoinevastel.com/bots/datadome') # 过不去
# p.goto('https://www.douyin.com/search/xinhuashe?&type=user')
# p.screenshot(path="end.png")
edge.stop()
Binary file modified dist/douyin.exe
Binary file not shown.
Binary file modified dist/monitor.exe
Binary file not shown.
Binary file modified dist/monitorStray.exe
Binary file not shown.
2 changes: 1 addition & 1 deletion login.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_cookies(self):
return cookies

def new_login(self) -> None:
edge = Browser(channel="msedge", need_login=False, headless=False)
edge = Browser(channel="msedge", need_login=False, headless=False, image=True)
self.context = edge.context
cookies = self._login()
edge.stop()
Expand Down
9 changes: 1 addition & 8 deletions spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from browser import Browser, BrowserContext

version = 'V3.230621'
version = 'V3.230622'
banner = rf'''
____ _ ____ _ _
| _ \ ___ _ _ _ _(_)_ __ / ___| _ __ (_) __| | ___ _ __
Expand Down Expand Up @@ -98,9 +98,7 @@ def url2redirect(self, url):
"""
取302跳转地址
"""
# a = self.context.request.head(url)
r = self.context.new_page()
r.route("**/*", lambda route: route.abort() if route.request.resource_type != "document" else route.continue_())
r.goto(url, wait_until='domcontentloaded')
url = r.url
r.close()
Expand Down Expand Up @@ -435,7 +433,6 @@ def init_(self):
def page_init(self):
self.page = self.context.new_page()
self.page.set_default_timeout(0)
self.page.route("**/*", lambda route: route.abort() if route.request.resource_type == "image" else route.continue_())
if self.has_more:
self.page.route(self.hookURL, self.handle)
self.page.goto(self.url)
Expand Down Expand Up @@ -524,10 +521,6 @@ def run(self):
self.pageDown += 1
logger.error("重试 + 1")
self.save() # 保存结果
# self.page.unroute(self.hookURL)
# self.page.unroute("**/*")
# self.page.wait_for_timeout(1000)
# self.page.screenshot(path="end.png")
self.page.close()


Expand Down

0 comments on commit 69f7dcd

Please sign in to comment.