Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Browser binary caching and evaluation queueing #33

Merged
merged 13 commits into from
Oct 23, 2024
Merged
9 changes: 9 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,13 @@
"test_*.py"
],
"python.testing.unittestEnabled": true,
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.rulers": [100, 120],
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"editor.formatOnType": true
},
}
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COPY /bci/web/vue ./
RUN npm run build


FROM openresty/openresty:1.25.3.1-3-bullseye AS nginx
FROM openresty/openresty:1.27.1.1-bullseye AS nginx
COPY ./nginx/start.sh /usr/local/bin/
COPY ./nginx/config /etc/nginx/config
COPY --from=ui-build-stage /app/dist /www/data
Expand Down
2 changes: 1 addition & 1 deletion bci/browser/binary/artisanal_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_artisanal_binaries_list(self) -> list:
return sorted(self.meta_info, key=lambda i: int(i["id"]))

def has_artisanal_binary_for(self, state: State) -> bool:
return len(list(filter(lambda x: x['id'] == state.revision_number, self.meta_info))) > 0
return len(list(filter(lambda x: x['id'] == state.revision_nb, self.meta_info))) > 0

def add_new_subfolders(self, subfolders):
logger.info("Adding new subfolders to metadata")
Expand Down
49 changes: 28 additions & 21 deletions bci/browser/binary/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
import logging
import os
from abc import abstractmethod
from typing import Optional

from bci import util
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.database.mongo.binary_cache import BinaryCache
from bci.version_control.states.state import State

logger = logging.getLogger(__name__)


class Binary:

def __init__(self, state: State):
self.state = state
self.__version = None
Expand Down Expand Up @@ -40,19 +41,23 @@ def bin_folder_path(self) -> str:

@property
def origin(self) -> str:
if 'artisanal' in self.get_bin_path():
bin_path = self.get_bin_path()
if bin_path is None:
raise AttributeError('Binary path is not available')

if 'artisanal' in bin_path:
return 'artisanal'
elif 'downloaded' in self.get_bin_path():
elif 'downloaded' in bin_path:
return 'downloaded'
else:
raise ValueError(f'Unknown binary origin for path \'{self.get_bin_path()}\'')
raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'")

@staticmethod
def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
binaries = []
for subfolder_path in os.listdir(os.path.join(bin_folder_path, "downloaded")):
for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')):
bin_entry = {}
bin_entry["id"] = subfolder_path
bin_entry['id'] = subfolder_path
binaries.append(bin_entry)
return binaries

Expand All @@ -67,17 +72,24 @@ def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> Artisan
def fetch_binary(self):
# Check cache
if self.is_built():
logger.info(f'Binary for {self.state.index} is already in place')
return
# Consult binary cache
elif BinaryCache.fetch_binary_files(self.get_potential_bin_path(), self.state):
logger.info(f'Binary for {self.state.index} fetched from cache')
return
# Try to download binary
elif self.is_available_online():
self.download_binary()
logger.info(f'Binary for {self.state.index} downloaded')
BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state)
else:
raise BuildNotAvailableError(self.browser_name, self.state)

def is_available(self):
'''
"""
Returns True if the binary is available either locally or online.
'''
"""
return self.is_available_locally() or self.is_available_online()

def is_available_locally(self):
Expand All @@ -95,7 +107,7 @@ def is_built(self):
bin_path = self.get_bin_path()
return bin_path is not None

def get_bin_path(self):
def get_bin_path(self) -> Optional[str]:
"""
Returns path to binary, only if the binary is available locally. Otherwise it returns None.
"""
Expand All @@ -112,8 +124,8 @@ def get_potential_bin_path(self, artisanal=False):
Returns path to potential binary. It does not guarantee whether the binary is available locally.
"""
if artisanal:
return os.path.join(self.bin_folder_path, "artisanal", self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, "downloaded", self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, 'artisanal', self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, 'downloaded', self.state.name, self.executable_name)

def get_bin_folder_path(self):
path_downloaded = self.get_potential_bin_folder_path()
Expand All @@ -126,25 +138,20 @@ def get_bin_folder_path(self):

def get_potential_bin_folder_path(self, artisanal=False):
if artisanal:
return os.path.join(self.bin_folder_path, "artisanal", self.state.name)
return os.path.join(self.bin_folder_path, "downloaded", self.state.name)
return os.path.join(self.bin_folder_path, 'artisanal', self.state.name)
return os.path.join(self.bin_folder_path, 'downloaded', self.state.name)

def remove_bin_folder(self):
path = self.get_bin_folder_path()
if path and "artisanal" not in path:
if path and 'artisanal' not in path:
if not util.rmtree(path):
logger.error("Could not remove folder '%s'" % path)

@abstractmethod
def get_driver_version(self, browser_version):
pass

@abstractmethod
def _get_version(self):
def _get_version(self) -> str:
pass


class BuildNotAvailableError(Exception):

def __init__(self, browser_name, build_state):
super().__init__("Browser build not available: %s (%s)" % (browser_name, build_state))
super().__init__('Browser build not available: %s (%s)' % (browser_name, build_state))
4 changes: 3 additions & 1 deletion bci/browser/binary/factory.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Type

from bci.browser.binary.binary import Binary
from bci.browser.binary.vendors.chromium import ChromiumBinary
from bci.browser.binary.vendors.firefox import FirefoxBinary
Expand Down Expand Up @@ -36,7 +38,7 @@ def get_binary(state: State) -> Binary:
return __get_object(state)


def __get_class(browser_name: str) -> Binary.__class__:
def __get_class(browser_name: str) -> Type[Binary]:
match browser_name:
case 'chromium':
return ChromiumBinary
Expand Down
21 changes: 5 additions & 16 deletions bci/browser/binary/vendors/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from bci.browser.binary.binary import Binary
from bci.version_control.states.state import State

logger = logging.getLogger('bci')
logger = logging.getLogger(__name__)

EXECUTABLE_NAME = 'chrome'
BIN_FOLDER_PATH = '/app/browser/binaries/chromium'
Expand Down Expand Up @@ -74,27 +74,16 @@ def download_binary(self):
shutil.rmtree(os.path.dirname(zip_file_path))

def _get_version(self) -> str:
bin_path = self.get_bin_path()
command = "./chrome --version"
output = cli.execute_and_return_output(command, cwd=os.path.dirname(bin_path))
if bin_path := self.get_bin_path():
output = cli.execute_and_return_output(command, cwd=os.path.dirname(bin_path))
else:
raise AttributeError(f'Could not get binary path for {self.state}')
match = re.match(r'Chromium (?P<version>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)', output)
if match:
return match.group("version")
raise AttributeError("Could not determine version of binary at '%s'. Version output: %s" % (bin_path, output))

def get_driver_path(self, full_browser_version):
driver_version = self.get_driver_version(full_browser_version)
driver_path = os.path.join(DRIVER_FOLDER_PATH, driver_version)
if os.path.exists(driver_path):
return driver_path
raise AttributeError("Could not find appropriate driver for Chromium %s" % full_browser_version)

def get_driver_version(self, browser_version):
short_browser_version = browser_version.split('.')[0]
if short_browser_version not in self.browser_version_to_driver_version.keys():
raise AttributeError("Could not determine driver version associated with Chromium version %s" % browser_version)
return self.browser_version_to_driver_version[short_browser_version]

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
Expand Down
2 changes: 0 additions & 2 deletions bci/browser/binary/vendors/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from bci import cli, util
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.browser.binary.binary import Binary
from bci.version_control.states.revisions.firefox import (BINARY_AVAILABILITY_MAPPING,
REVISION_NUMBER_MAPPING)
from bci.version_control.states.state import State

logger = logging.getLogger('bci')
Expand Down
49 changes: 23 additions & 26 deletions bci/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
import sys

import bci.database.mongo.container as container
from bci.evaluations.logic import DatabaseConnectionParameters
from bci.evaluations.logic import DatabaseParameters

logger = logging.getLogger(__name__)


class Global:

custom_page_folder = '/app/experiments/pages'

@staticmethod
Expand All @@ -25,7 +24,7 @@ def get_browser_config_class(browser: str):
case 'firefox':
return Firefox
case _:
raise ValueError(f'Invalid browser \'{browser}\'')
raise ValueError(f"Invalid browser '{browser}'")

@staticmethod
def get_available_domains() -> list[str]:
Expand All @@ -44,7 +43,9 @@ def check_required_env_parameters() -> bool:
fatal = False
# HOST_PWD
if (host_pwd := os.getenv('HOST_PWD')) in ['', None]:
logger.fatal('The "HOST_PWD" variable is not set. If you\'re using sudo, you might have to pass it explicitly, for example "sudo HOST_PWD=$PWD docker compose up".')
logger.fatal(
'The "HOST_PWD" variable is not set. If you\'re using sudo, you might have to pass it explicitly, for example "sudo HOST_PWD=$PWD docker compose up".'
)
fatal = True
else:
logger.debug(f'HOST_PWD={host_pwd}')
Expand All @@ -66,54 +67,49 @@ def initialize_folders():
file.write('{}')

@staticmethod
def get_database_connection_params() -> DatabaseConnectionParameters:
required_database_params = [
'BCI_MONGO_HOST',
'BCI_MONGO_USERNAME',
'BCI_MONGO_DATABASE',
'BCI_MONGO_PASSWORD'
]
missing_database_params = [
param for param in required_database_params
if os.getenv(param) in ['', None]]
def get_database_params() -> DatabaseParameters:
required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD']
missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]]
if missing_database_params:
logger.info(f'Could not find database parameters {missing_database_params}, using database container...')
return container.run()
else:
database_params = DatabaseConnectionParameters(
database_params = DatabaseParameters(
os.getenv('BCI_MONGO_HOST'),
os.getenv('BCI_MONGO_USERNAME'),
os.getenv('BCI_MONGO_PASSWORD'),
os.getenv('BCI_MONGO_DATABASE')
os.getenv('BCI_MONGO_DATABASE'),
int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)),
)
logger.info(f'Found database environment variables \'{database_params}\'')
logger.info(f"Found database environment variables '{database_params}'")
return database_params

@staticmethod
def get_tag() -> str:
'''
"""
Returns the Docker image tag of BugHog.
This should never be empty.
'''
assert (bughog_version := os.getenv('BUGHOG_VERSION')) not in ['', None]
"""
bughog_version = os.getenv('BUGHOG_VERSION', None)
if bughog_version is None or bughog_version == '':
raise ValueError('BUGHOG_VERSION is not set')
return bughog_version


class Chromium:

extension_folder = '/app/browser/extensions/chromium'
repo_to_use = 'online'


class Firefox:

extension_folder = '/app/browser/extensions/firefox'
repo_to_use = 'online'


class CustomHTTPHandler(logging.handlers.HTTPHandler):

def __init__(self, host: str, url: str, method: str = 'GET', secure: bool = False, credentials=None, context=None) -> None:
def __init__(
self, host: str, url: str, method: str = 'GET', secure: bool = False, credentials=None, context=None
) -> None:
super().__init__(host, url, method=method, secure=secure, credentials=credentials, context=context)
self.hostname = os.getenv('HOSTNAME')

Expand All @@ -124,8 +120,9 @@ def mapLogRecord(self, record):


class Loggers:

formatter = logging.Formatter(fmt='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s', datefmt='%d-%m-%Y %H:%M:%S')
formatter = logging.Formatter(
fmt='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s', datefmt='%d-%m-%Y %H:%M:%S'
)
memory_handler = logging.handlers.MemoryHandler(capacity=100, flushLevel=logging.ERROR)

@staticmethod
Expand Down
Loading
Loading