Skip to content

Commit

Permalink
added initial kinase Pydantic schema
Browse files Browse the repository at this point in the history
  • Loading branch information
jessicaw9910 committed Sep 27, 2024
1 parent e108f15 commit fcea96d
Showing 1 changed file with 80 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -1,60 +1,82 @@
from enum import Enum
from typing import List
from dataclasses import dataclass
from pydantic import BaseModel, constr

# TODO create a Pydantic model to incorporate and UniProt, Pfam, and KLIFS data for all
# eventually populate with AF2 active BLAminus+ structures from Dunbrack lab


# TODO: Make Pydantic model instead of dataclass
@dataclass
class KLIFSPocket:
"""Dataclass to hold KLIFS pocket alignment information per kinase.
Attributes
----------
uniprotID : str
UniProt ID
hgncName : str
HGNC name
uniprotSeq : str
UniProt canonical sequence
klifsSeq : str
KLIFS pocket sequence
list_klifs_region : list[str]
List of start and end regions of KLIFS pocket separated by ":"; end region will be the
same as start region if no concatenation necessary to find a single exact match
list_klifs_substr_actual : list[str]
List of substring of KLIFS pocket that maps to the *start region* of the KLIFS pocket
list_klifs_substr_match : list[str]
List of the actual substring used to match to the KLIFS pocket for the region(s) provided;
will be the same as list_klifs_substr_actual if no concatenation necessary to find a single exact match
list_substring_idxs : list[list[int] | None]
List of indices in UniProt sequence where KLIFS substring match starts;
offset by length of preceding KLIFS region with gaps removed
"""

uniprotID: str
hgncName: str
uniprotSeq: str
klifsSeq: str
list_klifs_region: list[str]
list_klifs_substr_actual: list[str]
list_klifs_substr_match: list[str]
list_substring_idxs: list[list[int] | None]

def remove_klifs_list_gaps(self):
"""Remove gaps from KLIFS pocket substring list.
Returns
-------
list_substring_klifs_narm = list[str]
List of KLIFS pocket substrings with gaps removed
"""
from missense_kinase_toolkit.databases.klifs import remove_gaps_from_klifs

list_substring_klifs_narm = [
remove_gaps_from_klifs(substring_klifs)
for substring_klifs in self.list_klifs_substring
]
return list_substring_klifs_narm

class Group(str, Enum):
"""Enum class for kinase groups."""
AGC = "AGC" # Protein Kinase A, G, and C families
Atypical = "Atypical" # Atypical protein kinases
CAMK = "CAMK" # Calcium/calmodulin-dependent protein kinase family
CK1 = "CK1" # Casein kinase 1 family
CMGC = "CMGC" # Cyclin-dependent kinase, Mitogen-activated protein kinase, Glycogen synthase kinase, and CDK-like kinase families
RGC = "RGC" # Receptor guanylate cyclase family
STE = "STE" # Homologs of yeast Sterile 7, Sterile 11, Sterile 20 kinases
TK = "TK" # Tyrosine kinase family
TKL = "TKL" # Tyrosine kinase-like family
Other = "Other" # Other protein kinases


class Family(str, Enum):
"""Enum class for kinase families (>=5 in KinHub)."""
STE20 = "STE20"
CAMKL = "CAMKL"
CDK = "CDK"
Eph = "Eph"
MAPK = "MAPK"
STKR = "STKR"
NEK = "NEK"
Src = "Src"
DYRK = "DYRK"
PKC = "PKC"
STE11 = "STE11"
RSK = "RSK"
MLK = "MLK"
GRK = "GRK"
CK1 = "CK1"
DMPK = "DMPK"
STE7 = "STE7"
PIKK = "PIKK"
RSKb = "RSKb"
Alpha = "Alpha"
Tec = "Tec"
CAMK1 = "CAMK1"
PDGFR = "PDGFR"
ULK = "ULK"
DAPK = "DAPK"
RAF = "RAF"
RIPK = "RIPK"
MLCK = "MLCK"
PKA = "PKA"
MAPKAPK = "MAPKAPK"
RGC = "RGC"
CDKL = "CDKL"
MAST = "MAST"
TSSK = "TSSK"
ABC1 = "ABC1"
PDHK = "PDHK"
Other = "Other"


UniProtAlphabet = constr(pattern="^[ACDEFGHIKLMNPQRSTVWXY]+$")
KLIFSAlphabet = constr(pattern="^[ACDEFGHIKLMNPQRSTVWY\-]+$")


class Kinase(BaseModel):
"""Pydantic model for kinase information."""
hgnc_name: str
uniprot_id: str
kinase_name: str
manning_name: List[str]
xname: List[str]
group: List[Group]
family: List[Family]
uniprot_seq: UniProtAlphabet
klifs_pocket: KLIFSAlphabet
pfam_id: str
pfam_start: int
pfam_end: int
klifs_pocket_seq: str
klifs_pocket_start: int
klifs_pocket_end: int

0 comments on commit fcea96d

Please sign in to comment.