Skip to content

Commit

Permalink
update h5 files
Browse files Browse the repository at this point in the history
  • Loading branch information
nargesr committed Dec 19, 2023
1 parent 10a7f53 commit 4346e87
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 23 deletions.
10 changes: 4 additions & 6 deletions Topyfic/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,16 +181,14 @@ def write_topic_yaml(self, topic_id=None, model_yaml_path="model.yaml", topic_ya
:type save: bool
"""

# check require columns
cols = self.gene_information.reset_index().columns
if not {'gene_name', 'gene_id'}.issubset(cols):
sys.exit(f"Gene information doesn't contain gene_name and gene_id columns!")

# Open the file and load the file
with open(model_yaml_path) as f:
model_yaml = yaml.load(f, Loader=SafeLoader)

if topic_id not in model_yaml['Topic IDs']:
if topic_id is None:
topic_id = self.id

if topic_id not in model_yaml['Topic file_name(s)']:
sys.exit("Topic_id is not in model YAML file!")

topic_yaml = {'Topic ID': topic_id,
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
setup(
name='Topyfic', # the name of your package
packages=['Topyfic'], # same as above
version='v0.4.9', # version number
version='v0.4.11', # version number
license='MIT', # license type
description='Topyfic is a Python package designed to identify reproducible latent dirichlet allocation (LDA) '
'using leiden clustering and harmony for single cell epigenomics data',
# short description
author='Narges Rezaie', # your name
author_email='[email protected]', # your email
url='https://github.com/mortazavilab/Topyfic', # url to your git repo
download_url='https://github.com/mortazavilab/Topyfic/archive/refs/tags/v0.4.9.tar.gz', # link to the tar.gz file associated with this release
download_url='https://github.com/mortazavilab/Topyfic/archive/refs/tags/v0.4.11.tar.gz', # link to the tar.gz file associated with this release
keywords=['Cellular Programs', 'Latent Dirichlet allocation', 'single-cell multiome', 'single-cell RNA-seq',
'gene regulatory network', 'Topic Modeling', 'single-nucleus RNA-seq'], #
python_requires='>=3.9',
Expand Down
47 changes: 32 additions & 15 deletions tutorials/topic_modeling_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ You have to save three type of objects
## Model YAML
```yaml
Assay: single nucleus RNA-seq
Cell-Topic participation ID: IGVF_000001_23
Experiment ID: IGVF_000001
Name of method: Topyfic
Number of topics: 22
Cell-Topic participation file_name: topic_cell_participation.h5ad
Data source: ali-mortazavi:topyfic_annotation
Method Name: Topyfic
Number of topics: 19
Technology:
- Parse
- 10x
Topic IDs:
Topic file_name(s):
- IGVF_000001_Topic_1
- IGVF_000001_Topic_2
- IGVF_000001_Topic_3
Expand All @@ -45,7 +45,11 @@ Topic IDs:
- IGVF_000001_Topic_20
- IGVF_000001_Topic_21
- IGVF_000001_Topic_22
Train file_name(s):
- train_parse_adrenal_13
- train_10x_adrenal_15
level: tissue
tissue: Adrenal gland
```
## Topic YAML
Expand Down Expand Up @@ -92,32 +96,45 @@ Topic information:
## Write Topyfic results in this format
you can use `write_model_yaml()` and `write_topic_yaml()` functions to embedded your results in this format.
you can use this script to write model yaml file.
```python
import Topyfic

# Read analysis object
analysis = Topyfic.read_analysis("analysis.p")
analysis_top_model = Topyfic.read_analysis(f"../analysis_10x_adrenal_15_parse_adrenal_13.p")

analysis_top_model.cell_participation.write_h5ad('topic_cell_participation.h5ad')

# information about model and datasets
model_info = {
'Experiment ID': 'IGVF_000001',
'Data source': 'ali-mortazavi:topyfic_annotation',
'Assay': 'single nucleus RNA-seq',
'Technology': ['Parse', '10x'],
'level': 'tissue',
'Name of method': 'Topyfic',
'Number of topics': 22,
'tissue': 'Adrenal gland',
'Method Name': 'Topyfic',
'Number of topics': 19,

}
model_info['Topic file_name(s)'] = list(top_model.topics.keys())
model_info['Cell-Topic participation file_name'] = 'topic_cell_participation.h5ad'
model_info['Train file_name(s)'] = ['train_parse_adrenal_13', 'train_10x_adrenal_15']

file = open('Adrenal_model_yaml.yaml', "w")
yaml.dump(model_info, file, default_flow_style=False)
file.close()
```

you can use `write_topic_yaml()` functions to embedded your topics in this format.

write_model_yaml(model_info, analysis.cell_participation)
```python
import Topyfic

for topic in analysis.top_model.topics:
top_model = Topyfic.read_topModel(f"topModel_10x_adrenal_15_parse_adrenal_13.p")
for topic in top_model.topics:
print(topic)
write_topic_yaml(topic_id=f"IGVF_000001_{topic}",
topic_info=analysis.top_model.topics[topic],
model_yaml_path="model.yaml",
top_model.topics[topic].write_topic_yaml(model_yaml_path="Adrenal_model_yaml.yaml",
topic_yaml_path=f"{topic}.yaml",
save=True)
```
Expand Down

0 comments on commit 4346e87

Please sign in to comment.