Skip to content

Commit

Permalink
Merge branch 'main' into refactor-schema-synchronization
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericenard authored Jul 16, 2024
2 parents 19428fb + 0e0ae30 commit fd4ed24
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 28 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.9" # run one job on 3.9
python-version: "3.10" # run one job on 3.9
cache: "pip"
- uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-central-1
- run: pip install -U laminci
- run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
- run: nox -s "install(group='hub-prod')"
- run: nox -s "build(lamin_env='prod', group='hub-prod')"
- uses: actions/upload-artifact@v2
Expand All @@ -42,7 +42,7 @@ jobs:
- lamin_env: "prod"
python-version: "3.11"
- lamin_env: "staging"
python-version: "3.10" # test on 3.10
python-version: "3.10" # test on 3.9
timeout-minutes: 6
steps:
- uses: aws-actions/configure-aws-credentials@v2
Expand Down Expand Up @@ -75,7 +75,7 @@ jobs:
run: docker pull postgres:latest && docker image save postgres:latest --output ~/postgres.tar
- if: steps.cache-postgres.outputs.cache-hit == 'true'
run: docker image load --input ~/postgres.tar
- run: pip install -U laminci
- run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
# account for in a different job
# - uses: "google-github-actions/auth@v0"
# with:
Expand All @@ -97,10 +97,10 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.11" # we need to run everything for coverage on 3.11
python-version: "3.9"
cache: "pip"
cache-dependency-path: ".github/workflows/build.yml"
- run: pip install -U laminci
- run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
- run: nox -s "install(group='storage')"
- run: nox -s lint
- run: nox -s storage
Expand Down Expand Up @@ -141,7 +141,7 @@ jobs:
python-version: "3.11" # we need to run everything for coverage on 3.11
cache: "pip"
cache-dependency-path: ".github/workflows/build.yml"
- run: pip install -U laminci
- run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
- run: nox -s "install(group='hub-local')"
- id: cache-supabase
uses: actions/cache@v3
Expand Down Expand Up @@ -204,7 +204,7 @@ jobs:
python-version: "3.10"
cache: "pip"
cache-dependency-path: ".github/workflows/build.yml"
- run: pip install -U laminci
- run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
- run: nox -s "install(group='docs')"
- uses: actions/download-artifact@v2
- run: nox -s docs
Expand Down
2 changes: 1 addition & 1 deletion lamindb_setup/_schema_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def _get_modules_metadata(self):
for model in self._get_schema_module(
module_name
).models.__dict__.values()
if model.__class__.__name__ == "ModelBase"
if model.__class__.__name__ == "RegistryMeta"
and model.__name__ not in ["Registry", "ORM"]
and not model._meta.abstract
and model.__get_schema_name__() == module_name
Expand Down
85 changes: 85 additions & 0 deletions lamindb_setup/core/_private_django_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from __future__ import annotations

import os
from pathlib import Path


def find_vscode_stubs_folder() -> Path | None:
# Possible locations of VSCode extensions
possible_locations = [
Path.home() / ".vscode" / "extensions", # Linux and macOS
Path.home() / ".vscode-server" / "extensions", # Remote development
Path(os.environ.get("APPDATA", "")) / "Code" / "User" / "extensions", # Windows
Path("/usr/share/code/resources/app/extensions"), # Some Linux distributions
]
for location in possible_locations:
if location.exists():
# Look for Pylance extension folder
pylance_folders = list(location.glob("ms-python.vscode-pylance-*"))
if pylance_folders:
# Sort to get the latest version
latest_pylance = sorted(pylance_folders)[-1]
stubs_folder = (
latest_pylance / "dist" / "bundled" / "stubs" / "django-stubs"
)
if stubs_folder.exists():
return stubs_folder

return None


def private_django_api(reverse=False):
from django import db

attributes = [
"DoesNotExist",
"MultipleObjectsReturned",
"add_to_class",
"adelete",
"refresh_from_db",
"asave",
"clean",
"clean_fields",
"date_error_message",
"full_clean",
"get_constraints",
"get_deferred_fields",
"prepare_database_save",
"save_base",
"serializable_value",
"unique_error_message",
"validate_constraints",
"validate_unique",
]
if not reverse:
attributes.append("a_refresh_from_db")
else:
attributes.append("arefresh_from_db")

django_path = Path(db.__file__).parent.parent

encoding = "utf8" if os.name == "nt" else None

def prune_file(file_path):
content = file_path.read_text(encoding=encoding)
original_content = content

for attr in attributes:
old_name = f"_{attr}" if reverse else attr
new_name = attr if reverse else f"_{attr}"
content = content.replace(old_name, new_name)

if not reverse:
content = content.replace("Field_DoesNotExist", "FieldDoesNotExist")
content = content.replace("Object_DoesNotExist", "ObjectDoesNotExist")

if content != original_content:
file_path.write_text(content, encoding=encoding)

for file_path in django_path.rglob("*.py"):
prune_file(file_path)

pylance_path = find_vscode_stubs_folder()
if pylance_path is not None:
for file_path in pylance_path.rglob("*.pyi"):
prune_file(file_path)
37 changes: 33 additions & 4 deletions lamindb_setup/core/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,25 @@ class SetupSettings:
_instance_settings_env: str | None = None

_auto_connect_path: Path = settings_dir / "auto_connect"
_private_django_api_path: Path = settings_dir / "private_django_api"

@property
def _instance_settings_path(self) -> Path:
return current_instance_settings_file()

@property
def settings_dir(self) -> Path:
"""The directory that holds locally persisted settings."""
return settings_dir

@property
def auto_connect(self) -> bool:
"""Auto-connect to loaded instance upon lamindb import."""
"""Auto-connect to loaded instance upon lamindb import.
`lamin init` and `lamin load` switch this to `True`.
`ln.connect()` doesn't change the value of this setting.
"""
return self._auto_connect_path.exists()

@auto_connect.setter
Expand All @@ -48,9 +55,30 @@ def auto_connect(self, value: bool) -> None:
else:
self._auto_connect_path.unlink(missing_ok=True)

@property
def private_django_api(self) -> bool:
"""Turn internal Django API private to clean up the API (default `False`).
This patches your local pip-installed django installation. You can undo
the patch by setting this back to `False`.
"""
return self._private_django_api_path.exists()

@private_django_api.setter
def private_django_api(self, value: bool) -> None:
from ._private_django_api import private_django_api

# we don't want to call private_django_api() twice
if value and not self.private_django_api:
private_django_api()
self._private_django_api_path.touch()
elif not value and self.private_django_api:
private_django_api(reverse=True)
self._private_django_api_path.unlink(missing_ok=True)

@property
def user(self) -> UserSettings:
""":class:`~lamindb.setup.core.UserSettings`."""
"""Settings of current user."""
env_changed = (
self._user_settings_env is not None
and self._user_settings_env != get_env_name()
Expand All @@ -64,7 +92,7 @@ def user(self) -> UserSettings:

@property
def instance(self) -> InstanceSettings:
""":class:`~lamindb.setup.core.InstanceSettings`."""
"""Settings of current LaminDB instance."""
env_changed = (
self._instance_settings_env is not None
and self._instance_settings_env != get_env_name()
Expand All @@ -76,7 +104,7 @@ def instance(self) -> InstanceSettings:

@property
def storage(self) -> StorageSettings:
""":class:`~lamindb.setup.core.StorageSettings`."""
"""Settings of default storage."""
return self.instance.storage

@property
Expand All @@ -92,6 +120,7 @@ def __repr__(self) -> str:
"""Rich string representation."""
repr = self.user.__repr__()
repr += f"\nAuto-connect in Python: {self.auto_connect}\n"
repr += f"\nPrune Django API: {self.private_django_api}\n"
if self._instance_exists:
repr += self.instance.__repr__()
else:
Expand Down
31 changes: 29 additions & 2 deletions lamindb_setup/core/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@

import base64
import hashlib
from typing import TYPE_CHECKING
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Iterable

import psutil

if TYPE_CHECKING:
from .types import Path, UPathStr
Expand Down Expand Up @@ -40,7 +43,7 @@ def hash_set(s: set[str]) -> str:
return to_b64_str(hashlib.md5(bstr).digest())[:20]


def hash_md5s_from_dir(hashes: list[str]) -> tuple[str, str]:
def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
# need to sort below because we don't want the order of parsing the dir to
# affect the hash
digests = b"".join(
Expand Down Expand Up @@ -83,3 +86,27 @@ def hash_file(
).digest()
hash_type = "sha1-fl"
return to_b64_str(digest)[:22], hash_type


def hash_dir(path: Path):
files = (subpath for subpath in path.rglob("*") if subpath.is_file())

def hash_size(file):
file_size = file.stat().st_size
return hash_file(file, file_size)[0], file_size

try:
n_workers = len(psutil.Process().cpu_affinity())
except AttributeError:
n_workers = psutil.cpu_count()
if n_workers > 1:
with ThreadPoolExecutor(n_workers) as pool:
hashes_sizes = pool.map(hash_size, files)
else:
hashes_sizes = map(hash_size, files)
hashes, sizes = zip(*hashes_sizes)

hash, hash_type = hash_md5s_from_dir(hashes)
n_objects = len(hashes)
size = sum(sizes)
return size, hash, hash_type, n_objects
5 changes: 4 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ def install(session: nox.Session, group: str) -> None:
elif group == "storage":
cmds = """uv pip install --system gcsfs"""
elif group == "hub-prod":
cmds = schema_deps.strip()
# cmds = "git clone --depth 1 https://github.com/django/django\n"
# cmds += "uv pip install --system -e ./django\n"
cmds = ""
cmds += schema_deps.strip()
elif group == "hub-local":
cmds = schema_deps + """uv pip install --system -e ./laminhub/rest-hub"""
# current package
Expand Down
12 changes: 0 additions & 12 deletions tests/hub-prod/test_auto_connect.py

This file was deleted.

55 changes: 55 additions & 0 deletions tests/hub-prod/test_global_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from __future__ import annotations

import subprocess
from pathlib import Path

import lamindb_setup as ln_setup
from lamindb_setup.core.hashing import hash_dir


def test_auto_connect():
current_state = ln_setup.settings.auto_connect
ln_setup.settings.auto_connect = True
assert ln_setup.settings._auto_connect_path.exists()
ln_setup.settings.auto_connect = False
assert not ln_setup.settings._auto_connect_path.exists()
ln_setup.settings.auto_connect = current_state


def test_private_django_api():
from django import db

django_dir = Path(db.__file__).parent.parent

# below, we're checking whether a repo is clean via the internal hashing
# function
# def is_repo_clean() -> bool:
# from django import db

# django_dir = Path(db.__file__).parent.parent
# print(django_dir)
# result = subprocess.run(
# ["git", "diff"],
# capture_output=True,
# text=True,
# cwd=django_dir,
# )
# print(result.stdout)
# print(result.stderr)
# return result.stdout.strip() == "" and result.stderr.strip() == ""

_, orig_hash, _, _ = hash_dir(django_dir)
current_state = ln_setup.settings.private_django_api
ln_setup.settings.private_django_api = True
# do not run below on CI, but only locally
# installing django via git didn't succeed
# assert not is_repo_clean()
_, hash, _, _ = hash_dir(django_dir)
assert hash != orig_hash
assert ln_setup.settings._private_django_api_path.exists()
ln_setup.settings.private_django_api = False
# assert is_repo_clean()
_, hash, _, _ = hash_dir(django_dir)
assert hash == orig_hash
assert not ln_setup.settings._private_django_api_path.exists()
ln_setup.settings.private_django_api = current_state

0 comments on commit fd4ed24

Please sign in to comment.