Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update GO database #10

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 91 additions & 52 deletions code/over_representation_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -392,44 +392,44 @@
"text/plain": [
"[{'query': 'YLL040C',\n",
" '_id': '850619',\n",
" '_score': 13.181119,\n",
" '_score': 14.875011,\n",
" 'entrezgene': '850619'},\n",
" {'query': 'YAL068C',\n",
" '_id': '851229',\n",
" '_score': 12.909299,\n",
" '_score': 14.873168,\n",
" 'entrezgene': '851229'},\n",
" {'query': 'YAL067C',\n",
" '_id': '851230',\n",
" '_score': 13.242119,\n",
" '_score': 14.873168,\n",
" 'entrezgene': '851230'},\n",
" {'query': 'YLL041C',\n",
" '_id': '850685',\n",
" '_score': 13.257929,\n",
" '_score': 14.873168,\n",
" 'entrezgene': '850685'},\n",
" {'query': 'YAL066W', 'notfound': True},\n",
" {'query': 'YLL042C',\n",
" '_id': '850684',\n",
" '_score': 13.2468605,\n",
" '_score': 14.877769,\n",
" 'entrezgene': '850684'},\n",
" {'query': 'YAL065C',\n",
" '_id': '851232',\n",
" '_score': 13.765591,\n",
" '_score': 14.362342,\n",
" 'entrezgene': '851232'},\n",
" {'query': 'YLL043W',\n",
" '_id': '850683',\n",
" '_score': 13.254437,\n",
" '_score': 14.359688,\n",
" 'entrezgene': '850683'},\n",
" {'query': 'YAL062W',\n",
" '_id': '851237',\n",
" '_score': 13.253534,\n",
" '_score': 14.873168,\n",
" 'entrezgene': '851237'},\n",
" {'query': 'YLL045C',\n",
" '_id': '850682',\n",
" '_score': 13.251958,\n",
" '_score': 14.873168,\n",
" 'entrezgene': '850682'}]"
]
},
"execution_count": 10,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -451,7 +451,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -588,7 +588,7 @@
"[500 rows x 4 columns]"
]
},
"execution_count": 11,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -617,7 +617,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -646,7 +646,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -656,12 +656,12 @@
"requests.get(http://purl.obolibrary.org/obo/go/go-basic.obo, stream=True)\n",
" WROTE: go-basic.obo\n",
"\n",
"go-basic.obo: fmt(1.2) rel(2020-03-23) 47,232 GO Terms\n",
"go-basic.obo: fmt(1.2) rel(2020-06-01) 47,233 GO Terms\n",
" EXISTS: go-basic.obo\n",
"go-basic.obo: fmt(1.2) rel(2020-03-23) 47,232 GO Terms; optional_attrs(relationship)\n",
"go-basic.obo: fmt(1.2) rel(2020-06-01) 47,233 GO Terms; optional_attrs(relationship)\n",
"FTP RETR ftp.ncbi.nlm.nih.gov gene/DATA gene2go.gz -> gene2go.gz\n",
" gunzip gene2go.gz\n",
"HMS:0:00:02.975079 94,086 annotations, 6,349 genes, 6,156 GOs, 1 taxids READ: gene2go \n"
"HMS:0:00:03.118437 94,569 annotations, 6,347 genes, 6,179 GOs, 1 taxids READ: gene2go \n"
]
}
],
Expand Down Expand Up @@ -694,7 +694,45 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GOTerm('GO:0000001'):\n",
" id:GO:0000001\n",
" item_id:GO:0000001\n",
" name:mitochondrion inheritance\n",
" namespace:biological_process\n",
" _parents: 2 items\n",
" GO:0048311\n",
" GO:0048308\n",
" parents: 2 items\n",
" GO:0048311\tlevel-05\tdepth-06\tmitochondrion distribution [biological_process]\n",
" GO:0048308\tlevel-05\tdepth-05\torganelle inheritance [biological_process]\n",
" children: 0 items\n",
" level:6\n",
" depth:7\n",
" is_obsolete:False\n",
" alt_ids: 0 items\n",
" relationship: 0 items\n",
" relationship_rev: 0 items\n",
" reldepth:7"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"godag['GO:0000001']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand All @@ -703,13 +741,13 @@
"text": [
"\n",
"Load BP Gene Ontology Analysis ...\n",
" 88% 3,887 of 4,436 population items found in association\n",
" 88% 3,895 of 4,436 population items found in association\n",
"\n",
"Load CC Gene Ontology Analysis ...\n",
" 93% 4,141 of 4,436 population items found in association\n",
" 94% 4,149 of 4,436 population items found in association\n",
"\n",
"Load MF Gene Ontology Analysis ...\n",
" 75% 3,338 of 4,436 population items found in association\n"
" 76% 3,362 of 4,436 population items found in association\n"
]
}
],
Expand All @@ -722,6 +760,7 @@
" obodag, # Ontologies\n",
" propagate_counts = False, # ???\n",
" alpha = 0.05, # default significance cut-off\n",
" ev_inc = 'IGI',\n",
" methods = ['fdr_bh']) # default multipletest correction method"
]
},
Expand All @@ -734,45 +773,45 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Run BP Gene Ontology Analysis: current study set of 500 IDs ... 88% 405 of 461 study items found in association\n",
"Run BP Gene Ontology Analysis: current study set of 500 IDs ... 87% 403 of 461 study items found in association\n",
" 92% 461 of 500 study items found in population(4436)\n",
"Calculating 2,843 uncorrected p-values using fisher\n",
" 2,843 GO terms are associated with 3,845 of 4,436 population items\n",
" 902 GO terms are associated with 405 of 499 study items\n",
"Calculating 2,856 uncorrected p-values using fisher\n",
" 2,856 GO terms are associated with 3,853 of 4,436 population items\n",
" 892 GO terms are associated with 403 of 499 study items\n",
" METHOD fdr_bh:\n",
" 0 GO terms found significant (< 0.05=alpha) ( 0 enriched + 0 purified): statsmodels fdr_bh\n",
" 0 study items associated with significant GO IDs (enriched)\n",
" 0 study items associated with significant GO IDs (purified)\n",
"\n",
"Run CC Gene Ontology Analysis: current study set of 500 IDs ... 91% 418 of 461 study items found in association\n",
"Run CC Gene Ontology Analysis: current study set of 500 IDs ... 91% 419 of 461 study items found in association\n",
" 92% 461 of 500 study items found in population(4436)\n",
"Calculating 712 uncorrected p-values using fisher\n",
" 712 GO terms are associated with 4,099 of 4,436 population items\n",
" 238 GO terms are associated with 418 of 499 study items\n",
"Calculating 721 uncorrected p-values using fisher\n",
" 721 GO terms are associated with 4,107 of 4,436 population items\n",
" 239 GO terms are associated with 419 of 499 study items\n",
" METHOD fdr_bh:\n",
" 0 GO terms found significant (< 0.05=alpha) ( 0 enriched + 0 purified): statsmodels fdr_bh\n",
" 0 study items associated with significant GO IDs (enriched)\n",
" 0 study items associated with significant GO IDs (purified)\n",
"\n",
"Run MF Gene Ontology Analysis: current study set of 500 IDs ... 73% 338 of 461 study items found in association\n",
"Run MF Gene Ontology Analysis: current study set of 500 IDs ... 74% 341 of 461 study items found in association\n",
" 92% 461 of 500 study items found in population(4436)\n",
"Calculating 1,792 uncorrected p-values using fisher\n",
" 1,792 GO terms are associated with 3,299 of 4,436 population items\n",
" 516 GO terms are associated with 338 of 499 study items\n",
"Calculating 1,802 uncorrected p-values using fisher\n",
" 1,802 GO terms are associated with 3,322 of 4,436 population items\n",
" 517 GO terms are associated with 341 of 499 study items\n",
" METHOD fdr_bh:\n",
" 0 GO terms found significant (< 0.05=alpha) ( 0 enriched + 0 purified): statsmodels fdr_bh\n",
" 0 study items associated with significant GO IDs (enriched)\n",
" 0 study items associated with significant GO IDs (purified)\n",
"Significant results with n=500: 0 BP + 0 MF + 0 CC\n",
" 58 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_redness.tsv\n"
" 59 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_redness.tsv\n"
]
}
],
Expand Down Expand Up @@ -807,15 +846,15 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=500: 0 BP + 0 MF + 0 CC\n",
" 88 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_norm_redness.tsv\n"
" 91 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_norm_redness.tsv\n"
]
}
],
Expand All @@ -825,15 +864,15 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=500: 0 BP + 0 MF + 0 CC\n",
" 84 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_redness.tsv\n"
" 82 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_redness.tsv\n"
]
}
],
Expand All @@ -843,15 +882,15 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=500: 0 BP + 0 MF + 0 CC\n",
" 54 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_norm_redness.tsv\n"
" 50 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_norm_redness.tsv\n"
]
}
],
Expand All @@ -872,15 +911,15 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=100: 0 BP + 0 MF + 0 CC\n",
" 122 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_redness.tsv\n"
" 128 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_redness.tsv\n"
]
}
],
Expand All @@ -890,15 +929,15 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=100: 0 BP + 0 MF + 0 CC\n",
" 135 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_norm_redness.tsv\n"
" 139 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_top_norm_redness.tsv\n"
]
}
],
Expand All @@ -908,15 +947,15 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=100: 0 BP + 0 MF + 0 CC\n",
" 104 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_redness.tsv\n"
" 106 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_redness.tsv\n"
]
}
],
Expand All @@ -926,15 +965,15 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Significant results with n=100: 0 BP + 0 MF + 0 CC\n",
" 90 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_norm_redness.tsv\n"
" 89 items WROTE: C:\\Users\\bejsab\\Documents\\repos\\phenobooth-analysis\\data\\final\\p_values_bottom_norm_redness.tsv\n"
]
}
],
Expand All @@ -951,7 +990,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 35,
"metadata": {},
"outputs": [
{
Expand All @@ -972,7 +1011,7 @@
"Significant results with n=120:\t0\t0\t0\t0\n",
"Significant results with n=130:\t0\t0\t0\t0\n",
"Significant results with n=140:\t0\t0\t0\t0\n",
"Significant results with n=150:\t2\t0\t0\t0\n",
"Significant results with n=150:\t1\t0\t0\t0\n",
"Significant results with n=160:\t1\t0\t0\t0\n",
"Significant results with n=170:\t1\t0\t0\t0\n",
"Significant results with n=180:\t1\t0\t0\t0\n",
Expand Down
Loading