Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add sec-ch-ua (client hints) #31

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions assets/sec_ch_ua.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"{\"132.0.0.0\": \"\\\"Not A(Brand\\\";v=\\\"8\\\", \\\"Chromium\\\";v=\\\"132\\\", \\\"Google Chrome\\\";v=\\\"132\\\"\", \"131.0.0.0\": \"\\\"Google Chrome\\\";v=\\\"131\\\", \\\"Chromium\\\";v=\\\"131\\\", \\\"Not_A Brand\\\";v=\\\"24\\\"\", \"130.0.0.0\": \"\\\"Chromium\\\";v=\\\"130\\\", \\\"Google Chrome\\\";v=\\\"130\\\", \\\"Not?A_Brand\\\";v=\\\"99\\\"\", \"129.0.0.0\": \"\\\"Google Chrome\\\";v=\\\"129\\\", \\\"Not=A?Brand\\\";v=\\\"8\\\", \\\"Chromium\\\";v=\\\"129\\\"\"}"
57 changes: 5 additions & 52 deletions src/masquer/utils/assets.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,6 @@
HEADER_DATA = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5;",
"Referer": "https://www.google.com/",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.3",
}
REFERERS = [
"https://www.google.com",
"https://bing.com",
"https://yandex.com",
"https://search.yahoo.com",
"https://duckduckgo.com",
"https://www.baidu.com",
]
HEADER_DATA = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5;', 'Referer': 'https://www.google.com/', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.3'}
REFERERS = ['https://www.google.com', 'https://bing.com', 'https://yandex.com', 'https://search.yahoo.com', 'https://duckduckgo.com', 'https://www.baidu.com']
REFERER_WEIGHTS = [79.1, 11.92, 3.02, 2.99, 0.84, 0.77]
USERAGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Herring/97.1.8280.8",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 AtContent/95.5.5462.5",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.1958",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3",
]
USERAGENT_WEIGHTS = [
31.48,
24.07,
17.59,
7.41,
4.63,
3.7,
2.78,
1.85,
1.85,
0.93,
0.93,
0.93,
0.93,
0.93,
]
USERAGENTS = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Herring/97.1.8280.8', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 AtContent/95.5.5462.5', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.1958', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3']
USERAGENT_WEIGHTS = [31.48, 24.07, 17.59, 7.41, 4.63, 3.7, 2.78, 1.85, 1.85, 0.93, 0.93, 0.93, 0.93, 0.93]
SEC_CH_UA = {"132.0.0.0": "\"Not A(Brand\";v=\"8\", \"Chromium\";v=\"132\", \"Google Chrome\";v=\"132\"", "131.0.0.0": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", "130.0.0.0": "\"Chromium\";v=\"130\", \"Google Chrome\";v=\"130\", \"Not?A_Brand\";v=\"99\"", "129.0.0.0": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\""}
9 changes: 8 additions & 1 deletion src/masquer/utils/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
REFERER_WEIGHTS,
USERAGENTS,
USERAGENT_WEIGHTS,
SEC_CH_UA
)
from .select import select_data
from .select import select_ch_ua, select_data


def get_response(
Expand Down Expand Up @@ -36,5 +37,11 @@ def get_response(
if useragent_requested:
useragent = select_data(USERAGENTS, USERAGENT_WEIGHTS)
response_data["User-Agent"] = useragent
if "Safari/537" in useragent:
sec_dict = select_ch_ua(useragent, SEC_CH_UA)
if(sec_dict):
response_data['Sec-CH-UA'] = sec_dict['sec-ch-ua']
response_data['Sec-CH-UA-Mobile'] = sec_dict['sec-ch-ua-mobile']
response_data['Sec-CH-UA-Platform'] = sec_dict['sec-ch-ua-platform']

return response_data
72 changes: 72 additions & 0 deletions src/masquer/utils/select.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import random
import re

VERSION_RE = re.compile(r"Chrome\/(\d+\.\d+\.\d+\.\d+)")

def select_data(sample_space: list[str], weights: list[float]) -> str:
"""
Expand All @@ -21,3 +23,73 @@ def select_data(sample_space: list[str], weights: list[float]) -> str:
selection = random.choices(sample_space, weights=weights, k=1)

return selection[0]

def select_ch_ua(user_agent: str, sec_ch_uas: dict) -> dict:
"""
Returns the Chrome sec-ch-ua from a user-agent string
"""

# Get the obvious two, mobile and platform
sec_dict = {}
if is_mobile(user_agent):
sec_dict["sec-ch-ua-mobile"] = "?1"
else:
sec_dict["sec-ch-ua-mobile"] = "?0"

sec_dict["sec-ch-ua-platform"] = get_platform(user_agent)

# Now for the rest - the actual UA
# This is three steps:
# - Identify the version of Chrome from user agent
# - Identify browser name from user agent
# - replace "Google Chrome" with browser name
try:
version = VERSION_RE.search(user_agent)
sec_ch_ua = sec_ch_uas[version.group(1)]
sec_ch_ua = sec_ch_ua.replace('Google Chrome', find_browser_name(user_agent))
sec_dict["sec-ch-ua"] = sec_ch_ua
return sec_dict
except:
return None


def is_mobile(user_agent: str) -> bool:
"""
Checks whether a user-agent is mobile
Returns True if mobile, else False
"""
return "Mobile" in user_agent

def get_platform(user_agent: str) -> str:
"""
Returns the platform of a user-agent
"""
if "Android" in user_agent:
return '"Android"'
elif "Linux x86_64" in user_agent:
return '"Linux"'
elif "Windows NT" in user_agent:
return '"Windows"'
elif "Macintosh" in user_agent:
return '"macOS"'
elif "iPhone" in user_agent:
return '"iOS"'
else:
return "Unknown"

def find_browser_name(user_agent: str) -> str:
"""
Returns the browser name from a user-agent string
"""
if "Brave" in user_agent:
return "Brave"
elif "Edg" in user_agent:
return "Microsoft Edge"
elif "OPR" in user_agent:
return "Opera"
elif "SamsungBrowser" in user_agent:
return "Samsung Internet"
elif "OPX" in user_agent:
return "Opera GX"
else:
return "Google Chrome"
64 changes: 62 additions & 2 deletions update.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import base64
import json
import os
import re
import requests
import sys
from bs4 import BeautifulSoup
Expand All @@ -12,6 +14,7 @@
ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
ASSETS_DIR = os.path.join(ROOT_DIR, "assets")
UTILS_DIR = os.path.join(ROOT_DIR, "src", "masquer", "utils")
VERSION_RE = re.compile(r"Chrome\/(\d+\.\d+\.\d+\.\d+)")


def update_useragents() -> bool:
Expand Down Expand Up @@ -97,6 +100,58 @@ def extract_data(json_file_path: str) -> dict | list[dict]:
data = json.load(f)
return data

def update_sec_ch_ua() -> bool:
"""Gets latest referer stats and saves them to JSON file"""
REPO_OWNER = "fa0311"
REPO_NAME = "latest-user-agent"
BASE_URL = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}"
FILE_PATH = "header.json"

latest_commits = get_latest_commits(f"{BASE_URL}/commits")
sec_ch_uas = {}
logger.info("Fetched sec-ch-ua data")
for commit in latest_commits:
file_content = get_file_content_at_commit(f"{BASE_URL}/contents/{FILE_PATH}", commit)
version, sec_ch_ua = extract_sec_ch_ua(file_content)
if sec_ch_ua:
sec_ch_uas[version] = sec_ch_ua
json_string = json.dumps(sec_ch_uas)
with open(os.path.join(ASSETS_DIR, "sec_ch_ua.json"), "w") as f:
json.dump(json_string, f)
return True


def get_latest_commits(url: str):
"""Fetch the latest 5 commits that modified the file."""
FILE_PATH = "header.json"
params = {
"path": FILE_PATH,
"per_page": 10 # Limit to the latest 5 commits
}
response = requests.get(url, params=params)
response.raise_for_status()
return [commit["sha"] for commit in response.json()]


def get_file_content_at_commit(url: str, commit_sha: str):
"""Fetch the file content at a specific commit."""
params = {
"ref": commit_sha
}
response = requests.get(url, params=params)
response.raise_for_status()
content = response.json()["content"]
# Decode the base64-encoded content
return base64.b64decode(content).decode("utf-8")

def extract_sec_ch_ua(json_content):
"""Extract the 'sec-ch-ua' value from the JSON content."""
data = json.loads(json_content)
user_agent = data.get("chrome", {}).get("user-agent", None)
if not user_agent:
return None, None
version = VERSION_RE.search(user_agent)
return version.group(1), data.get("chrome", {}).get("sec-ch-ua", None)

def update_assets() -> bool:
"""
Expand All @@ -109,6 +164,7 @@ def update_assets() -> bool:
header_data = extract_data(os.path.join(ASSETS_DIR, "header.json"))
referer_data = extract_data(os.path.join(ASSETS_DIR, "referers.json"))
useragent_data = extract_data(os.path.join(ASSETS_DIR, "useragents.json"))
sec_ch_ua_data = extract_data(os.path.join(ASSETS_DIR, "sec_ch_ua.json"))

referers = [obj["ref"] for obj in referer_data]
referer_weights = [obj["pct"] for obj in referer_data]
Expand All @@ -129,8 +185,10 @@ def update_assets() -> bool:
f.write("\n")
f.write("USERAGENT_WEIGHTS = " + str(useragent_weights))
f.write("\n")
f.write("SEC_CH_UA = " + str(sec_ch_ua_data))
f.write("\n")

logger.info("Saved user-agent and referer JSON data to assets.py")
logger.info("Saved user-agent, referer and sec-ch-ua JSON data to assets.py")
return True

except FileNotFoundError:
Expand All @@ -145,7 +203,9 @@ def update_assets() -> bool:
if __name__ == "__main__":
ua = update_useragents()
rf = update_referers()
if ua and rf:
sec_ch_ua = update_sec_ch_ua()

if ua and rf and sec_ch_ua:
assets_updated = update_assets()
if assets_updated:
sys.exit(0)
Expand Down
Loading