Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revert test file #337

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 52 additions & 101 deletions tdc/test/test_hf.py
Original file line number Diff line number Diff line change
@@ -1,115 +1,66 @@
from huggingface_hub import create_repo
from huggingface_hub import HfApi, snapshot_download, hf_hub_download
import os

deeppurpose_repo = [
'hERG_Karim-Morgan',
'hERG_Karim-CNN',
'hERG_Karim-AttentiveFP',
'BBB_Martins-AttentiveFP',
'BBB_Martins-Morgan',
'BBB_Martins-CNN',
'CYP3A4_Veith-Morgan',
'CYP3A4_Veith-CNN',
'CYP3A4_Veith-AttentiveFP',
]

model_hub = ["Geneformer", "scGPT"]
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

class tdc_hf_interface:
'''
Example use cases:
# initialize an interface object with HF repo name
tdc_hf_herg = tdc_hf_interface("hERG_Karim-Morgan")
# upload folder/files to this repo
tdc_hf_herg.upload('./Morgan_herg_karim_optimal')
# load deeppurpose model from this repo
dp_model = tdc_hf_herg.load_deeppurpose('./data')
dp_model.predict(XXX)
'''

def __init__(self, repo_name):
self.repo_id = "tdc/" + repo_name
try:
self.model_name = repo_name.split('-')[1]
except:
self.model_name = repo_name
import os
import sys

def upload(self, folder_path):
create_repo(repo_id=self.repo_id)
api = HfApi()
api.upload_folder(folder_path=folder_path,
path_in_repo="model",
repo_id=self.repo_id,
repo_type="model")
import unittest
import shutil
import pytest

def file_download(self, save_path, filename):
model_ckpt = hf_hub_download(repo_id=self.repo_id,
filename=filename,
cache_dir=save_path)
# temporary solution for relative imports in case TDC is not installed
# if TDC is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
# TODO: add verification for the generation other than simple integration

def repo_download(self, save_path):
snapshot_download(repo_id=self.repo_id, cache_dir=save_path)

def load(self):
if self.model_name not in model_hub:
raise Exception("this model is not in the TDC model hub GH repo.")
elif self.model_name == "Geneformer":
from transformers import AutoModelForMaskedLM
model = AutoModelForMaskedLM.from_pretrained(
"ctheodoris/Geneformer")
return model
elif self.model_name == "scGPT":
from transformers import AutoModel
model = AutoModel.from_pretrained("tdc/scGPT")
return model
raise Exception("Not implemented yet!")
class TestHF(unittest.TestCase):

def load_deeppurpose(self, save_path):
if self.repo_id[4:] in deeppurpose_repo:
save_path = save_path + '/' + self.repo_id[4:]
if not os.path.exists(save_path):
os.mkdir(save_path)
self.file_download(save_path, "model/model.pt")
self.file_download(save_path, "model/config.pkl")
def setUp(self):
print(os.getcwd())
pass

save_path = save_path + '/models--tdc--' + self.repo_id[
4:] + '/blobs/'
file_name1 = save_path + os.listdir(save_path)[0]
file_name2 = save_path + os.listdir(save_path)[1]
@pytest.mark.skip(
reason="This test is skipped due to deeppurpose installation dependency"
)
@unittest.skip(reason="DeepPurpose")
def test_hf_load_predict(self):
from tdc.single_pred import Tox
data = Tox(name='herg_karim')

if os.path.getsize(file_name1) > os.path.getsize(file_name2):
model_file, config_file = file_name1, file_name2
else:
config_file, model_file = file_name1, file_name2
from tdc import tdc_hf_interface
tdc_hf = tdc_hf_interface("hERG_Karim-CNN")
# load deeppurpose model from this repo
dp_model = tdc_hf.load_deeppurpose('./data')
tdc_hf.predict_deeppurpose(dp_model, ['CC(=O)NC1=CC=C(O)C=C1'])

os.rename(model_file, save_path + 'model.pt')
os.rename(config_file, save_path + 'config.pkl')
try:
from DeepPurpose import CompoundPred
except:
raise ValueError(
"Please install DeepPurpose package following https://github.com/kexinhuang12345/DeepPurpose#installation"
)
def test_hf_transformer(self):
from tdc import tdc_hf_interface
# from transformers import Pipeline
from transformers import BertForMaskedLM as BertModel
geneformer = tdc_hf_interface("Geneformer")
model = geneformer.load()
# assert isinstance(pipeline, Pipeline)
assert isinstance(model, BertModel), type(model)

net = CompoundPred.model_pretrained(path_dir=save_path)
return net
else:
raise ValueError("This repo does not host a DeepPurpose model!")
# def test_hf_load_new_pytorch_standard(self):
# from tdc import tdc_hf_interface
# # from tdc.resource.dataloader import DataLoader
# # data = DataLoader(name="pinnacle_dti")
# tdc_hf = tdc_hf_interface("mli-PINNACLE")
# dp_model = tdc_hf.load()
# assert dp_model is not None

def predict_deeppurpose(self, model, drugs):
def tearDown(self):
try:
from DeepPurpose import utils
print(os.getcwd())
shutil.rmtree(os.path.join(os.getcwd(), "data"))
except:
raise ValueError(
"Please install DeepPurpose package following https://github.com/kexinhuang12345/DeepPurpose#installation"
)
if self.model_name == 'AttentiveFP':
self.model_name = 'DGL_' + self.model_name
X_pred = utils.data_process(X_drug=drugs,
y=[0] * len(drugs),
drug_encoding=self.model_name,
split_method='no_split')
y_pred = model.predict(X_pred)[0]
return y_pred
pass


if __name__ == "__main__":
unittest.main()
Loading