From 821e78d9e631158b136ec46e0ce744ed0bc77425 Mon Sep 17 00:00:00 2001 From: kqlio67 <166700875+kqlio67@users.noreply.github.com> Date: Tue, 17 Dec 2024 20:18:24 +0000 Subject: [PATCH] feat(provider/blackbox): update API validation patterns and enhance request headers (#2494) * feat(blackbox2): update license key pattern - Change regex pattern from 'j= to v=' in license_pattern - Maintain consistent error handling and retry logic - Preserve header configuration for API requests * feat(blackbox): update API key pattern and add accept-language header - Change regex pattern from 'w= to p=' in key_pattern for validation - Add accept-language header to API request headers * refactor(blackbox): enhance UUID validation and regex pattern - Implement robust UUID validation with context checking - Update regex pattern to use dynamic character matching - Add helper function for validating UUID context * refactor(blackbox2): simplify license key validation - Update regex pattern from dynamic format to static 'v=' pattern - Remove context validation helper function - Add docstring for license key validation method --------- Co-authored-by: kqlio67 <> --- g4f/Provider/Blackbox.py | 34 ++++++++++++++++++++-------------- g4f/Provider/Blackbox2.py | 25 ++++++++++++++++--------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/g4f/Provider/Blackbox.py b/g4f/Provider/Blackbox.py index bac3d766450..fd788576f8b 100644 --- a/g4f/Provider/Blackbox.py +++ b/g4f/Provider/Blackbox.py @@ -150,7 +150,6 @@ def _save_cached_value(cls, value: str): @classmethod async def fetch_validated(cls): - # Let's try to load the value from the cache first cached_value = cls._load_cached_value() if cached_value: return cached_value @@ -165,19 +164,25 @@ async def fetch_validated(cls): page_content = await response.text() js_files = re.findall(r'static/chunks/\d{4}-[a-fA-F0-9]+\.js', page_content) - key_pattern = re.compile(r'w="([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})"') - - for js_file in js_files: - js_url = f"{cls.url}/_next/{js_file}" - async with session.get(js_url) as js_response: - if js_response.status == 200: - js_content = await js_response.text() - match = key_pattern.search(js_content) - if match: - validated_value = match.group(1) - # Save the new value to the cache file - cls._save_cached_value(validated_value) - return validated_value + uuid_format = r'["\']([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})["\']' + + def is_valid_context(text_around): + return any(char + '=' in text_around for char in 'abcdefghijklmnopqrstuvwxyz') + + for js_file in js_files: + js_url = f"{cls.url}/_next/{js_file}" + async with session.get(js_url) as js_response: + if js_response.status == 200: + js_content = await js_response.text() + for match in re.finditer(uuid_format, js_content): + start = max(0, match.start() - 10) + end = min(len(js_content), match.end() + 10) + context = js_content[start:end] + + if is_valid_context(context): + validated_value = match.group(1) + cls._save_cached_value(validated_value) + return validated_value except Exception as e: print(f"Error fetching validated value: {e}") @@ -240,6 +245,7 @@ async def create_async_generator( headers = { 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9', 'content-type': 'application/json', 'origin': cls.url, 'referer': f'{cls.url}/', diff --git a/g4f/Provider/Blackbox2.py b/g4f/Provider/Blackbox2.py index f27a25595f8..374d5c04746 100644 --- a/g4f/Provider/Blackbox2.py +++ b/g4f/Provider/Blackbox2.py @@ -64,7 +64,6 @@ def _save_cached_license(cls, license_key: str): @classmethod async def _get_license_key(cls, session: ClientSession) -> str: - """Gets the license key from the cache or from JavaScript files.""" cached_license = cls._load_cached_license() if cached_license: return cached_license @@ -73,18 +72,26 @@ async def _get_license_key(cls, session: ClientSession) -> str: async with session.get(cls.url) as response: html = await response.text() js_files = re.findall(r'static/chunks/\d{4}-[a-fA-F0-9]+\.js', html) - - license_pattern = re.compile(r'j="(\d{6}-\d{6}-\d{6}-\d{6}-\d{6})"') - + + license_format = r'["\'](\d{6}-\d{6}-\d{6}-\d{6}-\d{6})["\']' + + def is_valid_context(text_around): + return any(char + '=' in text_around for char in 'abcdefghijklmnopqrstuvwxyz') + for js_file in js_files: js_url = f"{cls.url}/_next/{js_file}" async with session.get(js_url) as js_response: js_content = await js_response.text() - if license_match := license_pattern.search(js_content): - license_key = license_match.group(1) - cls._save_cached_license(license_key) - return license_key - + for match in re.finditer(license_format, js_content): + start = max(0, match.start() - 10) + end = min(len(js_content), match.end() + 10) + context = js_content[start:end] + + if is_valid_context(context): + license_key = match.group(1) + cls._save_cached_license(license_key) + return license_key + raise ValueError("License key not found") except Exception as e: debug.log(f"Error getting license key: {str(e)}")