Skip to content

Commit

Permalink
release v0.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
markgeejw committed Oct 18, 2024
1 parent bc4f435 commit 048dfc5
Show file tree
Hide file tree
Showing 28 changed files with 3,289 additions and 175 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION ?= 0.5.0.dev1
VERSION ?= 0.5.0
SHELL := /bin/bash

.PHONY: releasehere
Expand Down
2 changes: 1 addition & 1 deletion anaconda_build/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: openprotein-python
version: "0.5.0.dev1"
version: "0.5.0"

source:
path: ../
Expand Down
32 changes: 22 additions & 10 deletions openprotein/api/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@
from openprotein.api.align import csv_stream
from openprotein.base import APISession
from openprotein.errors import InvalidParameterError
from openprotein.schemas import AttnJob, EmbeddingsJob, Job, LogitsJob, ModelMetadata
from openprotein.schemas import (
AttnJob,
EmbeddingsJob,
GenerateJob,
LogitsJob,
ModelMetadata,
ScoreJob,
ScoreSingleSiteJob,
)
from pydantic import TypeAdapter

PATH_PREFIX = "v1/embeddings"
Expand Down Expand Up @@ -256,7 +264,7 @@ def request_score_post(
model_id: str,
sequences: list[bytes] | list[str],
prompt_id: str | None = None,
) -> Job:
) -> ScoreJob:
"""
POST a request for sequence scoring for the given model ID. \
Returns a Job object referring to this request \
Expand Down Expand Up @@ -284,15 +292,15 @@ def request_score_post(
if prompt_id is not None:
body["prompt_id"] = prompt_id
response = session.post(endpoint, json=body)
return Job.model_validate(response.json())
return ScoreJob.model_validate(response.json())


def request_score_single_site_post(
session: APISession,
model_id: str,
base_sequence: bytes,
base_sequence: bytes | str,
prompt_id: str | None = None,
) -> Job:
) -> ScoreSingleSiteJob:
"""
POST a request for single site mutation scoring for the given model ID. \
Returns a Job object referring to this request \
Expand All @@ -314,12 +322,16 @@ def request_score_single_site_post(
endpoint = PATH_PREFIX + f"/models/{model_id}/score_single_site"

body: dict = {
"base_sequence": base_sequence.decode(),
"base_sequence": (
base_sequence.decode()
if isinstance(base_sequence, bytes)
else base_sequence
),
}
if prompt_id is not None:
body["prompt_id"] = prompt_id
response = session.post(endpoint, json=body)
return Job.model_validate(response.json())
return ScoreSingleSiteJob.model_validate(response.json())


def request_generate_post(
Expand All @@ -332,7 +344,7 @@ def request_generate_post(
max_length: int = 1000,
random_seed: int | None = None,
prompt_id: str | None = None,
) -> Job:
) -> GenerateJob:
"""
POST a request for sequence generation for the given model ID. \
Returns a Job object referring to this request \
Expand Down Expand Up @@ -364,7 +376,7 @@ def request_generate_post(
random_seed = random.randrange(2**32)

body: dict = {
"generate_n": num_samples,
"n_sequences": num_samples,
"temperature": temperature,
"maxlen": max_length,
}
Expand All @@ -377,4 +389,4 @@ def request_generate_post(
if prompt_id is not None:
body["prompt_id"] = prompt_id
response = session.post(endpoint, json=body)
return Job.model_validate(response.json())
return GenerateJob.model_validate(response.json())
10 changes: 5 additions & 5 deletions openprotein/api/fold.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from openprotein.api.embedding import ModelMetadata
from openprotein.base import APISession
from openprotein.schemas import Job
from openprotein.schemas import FoldJob
from pydantic import TypeAdapter

PATH_PREFIX = "v1/fold"
Expand Down Expand Up @@ -80,7 +80,7 @@ def fold_models_esmfold_post(
session: APISession,
sequences: list[bytes],
num_recycles: int | None = None,
) -> Job:
) -> FoldJob:
"""
POST a request for structure prediction using ESMFold. Returns a Job object referring to this request
that can be used to retrieve results later.
Expand Down Expand Up @@ -108,7 +108,7 @@ def fold_models_esmfold_post(
body["num_recycles"] = num_recycles

response = session.post(endpoint, json=body)
return Job.model_validate(response.json())
return FoldJob.model_validate(response.json())


def fold_models_alphafold2_post(
Expand All @@ -117,7 +117,7 @@ def fold_models_alphafold2_post(
num_recycles: int | None = None,
num_models: int = 1,
num_relax: int = 0,
) -> Job:
) -> FoldJob:
"""
POST a request for structure prediction using AlphaFold2. Returns a Job object referring to this request
that can be used to retrieve results later.
Expand Down Expand Up @@ -152,4 +152,4 @@ def fold_models_alphafold2_post(
response = session.post(endpoint, json=body)
# GET endpoint for AF2 expects the query sequence (first sequence) within the MSA
# since we don't know what the is, leave the sequence out of the future to be retrieved when calling get()
return Job.model_validate(response.json())
return FoldJob.model_validate(response.json())
79 changes: 74 additions & 5 deletions openprotein/api/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
import numpy as np
import pandas as pd
from openprotein.base import APISession
from openprotein.schemas import Job, PredictorMetadata
from openprotein.schemas import (
CVJob,
Job,
PredictJob,
PredictMultiJob,
PredictMultiSingleSiteJob,
PredictorMetadata,
PredictSingleSiteJob,
TrainJob,
)
from pydantic import TypeAdapter

PATH_PREFIX = "v1/predictor"
Expand Down Expand Up @@ -99,13 +108,33 @@ def predictor_fit_gp_post(
body["description"] = description

response = session.post(endpoint, json=body)
return Job.model_validate(response.json())
return TrainJob.model_validate(response.json())


def predictor_delete(session: APISession, predictor_id: str):
raise NotImplementedError()


def predictor_crossvalidate_post(
session: APISession, predictor_id: str, n_splits: int | None = None
):
endpoint = PATH_PREFIX + f"/{predictor_id}/crossvalidate"

params = {}
if n_splits is not None:
params["n_splits"] = n_splits
response = session.post(endpoint, params=params)

return CVJob.model_validate(response.json())


def predictor_crossvalidate_get(session: APISession, crossvalidate_job_id: str):
endpoint = PATH_PREFIX + f"/crossvalidate/{crossvalidate_job_id}"

response = session.get(endpoint)
return response.content


def predictor_predict_post(
session: APISession, predictor_id: str, sequences: list[bytes] | list[str]
):
Expand All @@ -117,7 +146,25 @@ def predictor_predict_post(
}
response = session.post(endpoint, json=body)

return Job.model_validate(response.json())
return PredictJob.model_validate(response.json())


def predictor_predict_single_site_post(
session: APISession,
predictor_id: str,
base_sequence: bytes | str,
):
endpoint = PATH_PREFIX + f"/{predictor_id}/predict_single_site"

base_sequence = (
base_sequence.decode() if isinstance(base_sequence, bytes) else base_sequence
)
body = {
"base_sequence": base_sequence,
}
response = session.post(endpoint, json=body)

return PredictSingleSiteJob.model_validate(response.json())


def predictor_predict_get_sequences(
Expand Down Expand Up @@ -179,9 +226,9 @@ def predictor_predict_get_batched_result(
return response.content


def decode_score(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.ndarray]:
def decode_predict(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.ndarray]:
"""
Decode embedding.
Decode prediction scores.
Args:
data (bytes): raw bytes encoding the array received over the API
Expand All @@ -203,3 +250,25 @@ def decode_score(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.nda
mus = scores[:, ::2]
vars = scores[:, 1::2]
return mus, vars


def decode_crossvalidate(data: bytes) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Decode crossvalidate scores.
Args:
data (bytes): raw bytes encoding the array received over the API
Returns:
mus (np.ndarray): decoded array of means
vars (np.ndarray): decoded array of variances
"""
s = io.BytesIO(data)
# should contain header and sequence column
df = pd.read_csv(s)
scores = df.values
# row_num, seq, measurement_name, y, y_mu, y_var
y = scores[:, 3]
mus = scores[:, 4]
vars = scores[:, 5]
return y, mus, vars
49 changes: 46 additions & 3 deletions openprotein/api/svd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import io

import numpy as np
from openprotein.base import APISession
from openprotein.errors import InvalidParameterError
from openprotein.schemas import FitJob, Job, SVDEmbeddingsJob, SVDMetadata
from openprotein.schemas import FitJob, SVDEmbeddingsJob, SVDMetadata
from pydantic import TypeAdapter

PATH_PREFIX = "v1/embeddings/svd"
Expand Down Expand Up @@ -40,6 +43,46 @@ def svd_get_sequences(session: APISession, svd_id: str) -> list[bytes]:
return TypeAdapter(list[bytes]).validate_python(response.json())


def embed_get_sequence_result(
session: APISession, job_id: str, sequence: str | bytes
) -> bytes:
"""
Get encoded svd embeddings result for a sequence from the request ID.
Parameters
----------
session : APISession
Session object for API communication.
job_id : str
job ID to retrieve results from
sequence : bytes
sequence to retrieve results for
Returns
-------
result : bytes
"""
if isinstance(sequence, bytes):
sequence = sequence.decode()
endpoint = PATH_PREFIX + f"/embed/{job_id}/{sequence}"
response = session.get(endpoint)
return response.content


def embed_decode(data: bytes) -> np.ndarray:
"""
Decode embedding.
Args:
data (bytes): raw bytes encoding the array received over the API
Returns:
np.ndarray: decoded array
"""
s = io.BytesIO(data)
return np.load(s, allow_pickle=False)


def svd_delete(session: APISession, svd_id: str):
"""
Delete and SVD model.
Expand Down Expand Up @@ -121,7 +164,7 @@ def svd_fit_post(


def svd_embed_post(
session: APISession, svd_id: str, sequences: list[bytes]
session: APISession, svd_id: str, sequences: list[bytes] | list[str]
) -> SVDEmbeddingsJob:
"""
POST a request for embeddings from the given SVD model.
Expand All @@ -139,7 +182,7 @@ def svd_embed_post(
-------
Job
"""
endpoint = PATH_PREFIX + f"/svd/{svd_id}/embed"
endpoint = PATH_PREFIX + f"/{svd_id}/embed"

sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
body = {
Expand Down
9 changes: 1 addition & 8 deletions openprotein/app/models/align/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,7 @@ def __init__(
"""
super().__init__(session, job)
self.page_size = page_size
self._msa_id = None
self._prompt_id = None

@property
def msa_id(self) -> str:
if self._msa_id is None:
self._msa_id = self.job.job_id
return self._msa_id
self.msa_id = self.job.job_id

# def wait(self, verbose: bool = False):
# _ = self.job.wait(
Expand Down
7 changes: 1 addition & 6 deletions openprotein/app/models/align/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,7 @@ def __init__(
if msa_id is None:
msa_id = job_api.job_args_get(self.session, job.job_id).get("root_msa")
self._msa_id = msa_id

@property
def prompt_id(self) -> str:
if self._prompt_id is None:
self._prompt_id = self.job.job_id
return self._prompt_id
self.prompt_id = self.job.job_id

# def wait(self, verbose: bool = False, **kwargs) -> Iterator[list[str]]:
# _ = self.job.wait(
Expand Down
12 changes: 9 additions & 3 deletions openprotein/app/models/design.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from openprotein.api import design
from openprotein.base import APISession
from openprotein.schemas import DesignJob, DesignResults
from openprotein.schemas import DesignJob, DesignResults, DesignStep

from .futures import Future, PagedFuture

Expand All @@ -20,8 +20,14 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return repr(self.job)

def _fmt_results(self, results: DesignResults) -> list[dict]:
return [i.model_dump() for i in results.result]
def _fmt_results(
self, results: DesignResults
) -> (
# list[dict]
list[DesignStep]
):
# return [i.model_dump() for i in results.result]
return results.result

@property
def id(self):
Expand Down
Loading

0 comments on commit 048dfc5

Please sign in to comment.