Skip to content

Commit

Permalink
Merge pull request #227 from FlyBase/gp
Browse files Browse the repository at this point in the history
geneproduct data fr testing
  • Loading branch information
ianlongden authored Sep 13, 2023
2 parents bc3f7db + 96b272b commit 5ae75b3
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 3 deletions.
98 changes: 98 additions & 0 deletions Load/geneproduct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
r"""
:synopsis: Create genes and alleles etc needed for testing of geneproducts.
:moduleauthor: Ian Longden <[email protected]>
create 30 genes for testing geneproducts.
first ten only linked to alleles
fb_test=# select f.name, f.uniquename, cvt.name from feature f, cvterm cvt where f.type_id = cvt.cvterm_id and f.name like 'gpt5%';
name | uniquename | name
------------+-------------+--------
gpt5 | FBgn0000126 | gene
gpt5[Clk1] | FBal0000165 | allele
gpt5[Clk2] | FBal0000166 | allele
second 10 also linked to mRNA
fb_test=# select f.name, f.uniquename, cvt.name from feature f, cvterm cvt where f.type_id = cvt.cvterm_id and f.name like 'gpt15%';
name | uniquename | name
-------------+-------------+--------
gpt15 | FBgn0000136 | gene
gpt15-RA | FBtr0000106 | mRNA
gpt15-RB | FBtr0000107 | mRNA
gpt15[Clk1] | FBal0000185 | allele
gpt15[Clk2] | FBal0000186 | allele
last 10 linked to mRNA and polypeptides
fb_test=# select f.name, f.uniquename, cvt.name from feature f, cvterm cvt where f.type_id = cvt.cvterm_id and f.name like 'gpt25%';
name | uniquename | name
-------------+-------------+-------------
gpt25 | FBgn0000146 | gene
gpt25-RA | FBtr0000126 | mRNA
gpt25-RB | FBtr0000127 | mRNA
gpt25[Clk1] | FBal0000205 | allele
gpt25[Clk2] | FBal0000206 | allele
gpt25-PB | FBpp0000027 | polypeptide
gpt25-PA | FBpp0000026 | polypeptide
"""
from .gene_alleles import create_gene_alleles

feat_sql = """ INSERT INTO feature (dbxref_id, organism_id, name, uniquename, residues, seqlen, type_id)
VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING feature_id"""
fs_sql = """ INSERT INTO feature_synonym (synonym_id, feature_id, pub_id, is_current) VALUES (%s, %s, %s, %s) """
syn_sql = """ INSERT INTO synonym (name, type_id, synonym_sgml) VALUES (%s, %s, %s) RETURNING synonym_id """
feat_rel_sql = """ INSERT INTO feature_relationship (subject_id, object_id, type_id)
VALUES (%s, %s, %s) RETURNING feature_relationship_id """


def create_geneproducts(cursor, organism_id, feature_id, cvterm_id, dbxref_id, db_id, pub_id):

create_gene_alleles(
cursor, organism_id, feature_id, cvterm_id, db_id, pub_id,
num_genes=30,
num_alleles=2,
gene_prefix='gpt',
allele_prefix=None,
tool_prefix='Clk'
)

# first 10 genes have alleles only.
for gene_count in range(10, 30):
# 10 ->29 have transcripts (mRNA)
# gptx-Ry x=gene_count y='A', 'B';
gene_name = f"gpt{gene_count}"
for postfix in ['A', 'B']:
tr_name = f"{gene_name}-R{postfix}"
cursor.execute(feat_sql, (None, organism_id['Dmel'], tr_name,
'FBtr:temp_0', None, None, cvterm_id['mRNA']))
feature_id[tr_name] = mrna_id = cursor.fetchone()[0]

# add synonyms
cursor.execute(syn_sql, (tr_name, cvterm_id['symbol'], tr_name))
symbol_id = cursor.fetchone()[0]

# add feature_synonym
cursor.execute(fs_sql, (symbol_id, mrna_id, pub_id, True))

# add relationship to gene
cursor.execute(feat_rel_sql, (feature_id[tr_name], feature_id[gene_name], cvterm_id['partof']))

# 20 -> 29 have polypeptides too.
if gene_count < 20:
continue
for postfix in ['A', 'B']:
pp_name = f"{gene_name}-P{postfix}"
cursor.execute(feat_sql, (None, organism_id['Dmel'], pp_name,
'FBpp:temp_0', None, None, cvterm_id['polypeptide']))
feature_id[pp_name] = cursor.fetchone()[0]

# add synonyms
cursor.execute(syn_sql, (pp_name, cvterm_id['symbol'], pp_name))
symbol_id = cursor.fetchone()[0]

# add feature_synonym
cursor.execute(fs_sql, (symbol_id, mrna_id, pub_id, True))

# add relationship to tr
cursor.execute(feat_rel_sql, (feature_id[tr_name], feature_id[pp_name], cvterm_id['producedby']))
4 changes: 4 additions & 0 deletions add-test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from Load.cell_line import add_cell_line_data
from Load.aberration import add_aberration_data
from Load.drivers import add_driver_data
from Load.geneproduct import create_geneproducts

conn = psycopg2.connect(database="fb_test")
cursor = conn.cursor()
Expand Down Expand Up @@ -558,6 +559,9 @@ def load_pub_author_pubprop(parsed_yaml):
fr_id = cursor.fetchone()[0]
cursor.execute(frp_sql, (fr_id, pub_id))

# gene product data
create_geneproducts(cursor, organism_id, feature_id, cvterm_id, dbxref_id, db_id, pub_id)

conn.commit()
conn.close()
print("SUCCESS")
4 changes: 2 additions & 2 deletions data/cv_cvterm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

# order dependent cv/cvterms. i.e. accession are specific and numbered.
#######################################################################
# Order is important only add to end of SO list. Tests relie on this!!
# Order is important only add to end of SO list. Tests rely on this!!
SO: ['chromosome_arm', 'chromosome', 'gene', 'mRNA', 'DNA', 'golden_path', 'ncRNA_gene',
'regulatory_region', 'chromosome_structure_variation', 'chromosomal_inversion',
'natural population', 'cloned_region', 'engineered_region', 'transgenic_transposable_element',
'transposable_element_insertion_site', 'chromosome_band', 'allele', 'transposable_element',
'natural_transposable_element', 'gene_group', 'polypeptide', 'chromosome_breakpoint', 'engineered_plasmid', 'sgRNA',
'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site', 'synthetic_sequence']
'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site', 'synthetic_sequence', 'RNA']
molecular_function: ['mRNA binding']
cellular_component: ['nucleolus', 'something' ,'extracellular space', 'endoplasmic reticulum']
biological_process: ['activation of immune response', 'defense response to other organism', 'rRNA processing']
Expand Down
2 changes: 1 addition & 1 deletion data/db_dbxref.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
testdb: ['hh-1']
testdb2: []
EMBL-EBI Single Cell Expression Atlas Datasets: []
FBbt: []
FBbt: ['dissociated larval fat cell', 'CP1 lineage neuron']
FBcv: []
FBdv: []
GB: ['GB1', 'GB2']
Expand Down

0 comments on commit 5ae75b3

Please sign in to comment.