Skip to content

Commit

Permalink
unit test for get_axes()
Browse files Browse the repository at this point in the history
  * refactorings and tests for get_axes()
  * pass through random_state
  * separate data set for cluster() function
  • Loading branch information
weiju committed May 29, 2024
1 parent 4f06bf5 commit 85de964
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 12 deletions.
25 changes: 14 additions & 11 deletions miner/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def pearson_array(array, vector):
return np.sum(product_array,axis=1)/float(product_array.shape[1]-1)


def getAxes(clusters, expressionData):
def get_axes(clusters, expressionData, random_state):
axes = {}
for key in list(clusters.keys()):
genes = clusters[key]
Expand Down Expand Up @@ -959,7 +959,7 @@ def combineClusters(axes,clusters,threshold=0.925):

return revisedClusters

def reconstruction(decomposedList,expressionData,threshold=0.925):
def reconstruction(decomposedList,expressionData, random_state, threshold=0.925):

if len(decomposedList) == 0:
return decomposedList
Expand All @@ -968,16 +968,17 @@ def reconstruction(decomposedList,expressionData,threshold=0.925):
return decomposedList

clusters = {i:decomposedList[i] for i in range(len(decomposedList))}
axes = getAxes(clusters,expressionData)
axes = get_axes(clusters, expressionData, random_state)
recombine = combineClusters(axes,clusters,threshold)
return recombine

def recursive_alignment(geneset,expressionData,minNumberGenes=6,pct_threshold=80):
def recursive_alignment(geneset,expressionData,minNumberGenes=6,
pct_threshold=80, random_state=12):
recDecomp = recursive_decomposition(geneset,expressionData,minNumberGenes,pct_threshold)
if len(recDecomp) == 0:
return []

reconstructed = reconstruction(recDecomp,expressionData)
reconstructed = reconstruction(recDecomp,expressionData, random_state)
reconstructedList = [reconstructed[i] for i in list(reconstructed.keys()) if len(reconstructed[i])>minNumberGenes]
reconstructedList.sort(key = lambda s: -len(s))
return reconstructedList
Expand Down Expand Up @@ -1020,7 +1021,7 @@ def cluster(expressionData, minNumberGenes=6, minNumberOverExpSamples=4, maxSamp
cluster2 = np.array(df.index[np.where(pearson < lowpass)[0]])

for clst in [cluster1, cluster2]:
pdc = recursive_alignment(clst, expressionData=df, minNumberGenes=minNumberGenes, pct_threshold=pct_threshold)
pdc = recursive_alignment(clst, expressionData=df, minNumberGenes=minNumberGenes, pct_threshold=pct_threshold, random_state=random_state)
if len(pdc) == 0:
continue
elif len(pdc) == 1:
Expand Down Expand Up @@ -1193,19 +1194,21 @@ def membershipToIncidence(membershipDictionary,expressionData):

return incidence

def processCoexpressionLists(lists,expressionData,threshold=0.925):
reconstructed = reconstruction(lists,expressionData,threshold)
def processCoexpressionLists(lists,expressionData, random_state, threshold=0.925):
reconstructed = reconstruction(lists,expressionData, random_state, threshold)
reconstructedList = [reconstructed[i] for i in reconstructed.keys()]
reconstructedList.sort(key = lambda s: -len(s))
return reconstructedList

def reviseInitialClusters(clusterList,expressionData,threshold=0.925):
coexpressionLists = processCoexpressionLists(clusterList,expressionData,threshold)

def reviseInitialClusters(clusterList, expressionData, random_state=12, threshold=0.925):
coexpressionLists = processCoexpressionLists(clusterList, expressionData, random_state, threshold)
coexpressionLists.sort(key= lambda s: -len(s))

for iteration in range(5):
previousLength = len(coexpressionLists)
coexpressionLists = processCoexpressionLists(coexpressionLists,expressionData,threshold)
coexpressionLists = processCoexpressionLists(coexpressionLists, expressionData,
random_state, threshold)
newLength = len(coexpressionLists)
if newLength == previousLength:
break
Expand Down
22 changes: 21 additions & 1 deletion test/mechinf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,39 @@
def test_cluster():
exp = pd.read_csv('testdata/exp_data_preprocessed-002.csv', header=0,
index_col=0)
with open("testdata/init_clusters-002.json") as infile:
with open("testdata/init_clusters-001.json") as infile:
ref_init_clusters = json.load(infile)
init_clusters = miner.cluster(exp,
minNumberGenes=6,
minNumberOverExpSamples=4,
maxSamplesExcluded=0.5,
random_state=12,
overExpressionThreshold=80)
#with open("init_clusters-002.json", "w") as outfile:
# json.dump(init_clusters, outfile)

for cluster in init_clusters:
assert(len(cluster) >= 6)
#assert(len(ref_init_clusters) == len(init_clusters))


def test_get_axes():
cluster = []
with open("testdata/cluster1-00.txt") as infile:
for line in infile:
cluster.append(line.strip())
exp = pd.read_csv('testdata/exp_data_preprocessed-002.csv', header=0,
index_col=0)
with open("testdata/ref_axes-000.json") as infile:
ref_axes = json.load(infile)

axes = miner.get_axes({"1": cluster}, exp, random_state=12)
json_axes = {}
for key, arr in axes.items():
json_axes[key] = list(arr)
assert(ref_axes == json_axes)


def test_recursive_decomposition():
cluster = []
with open("testdata/cluster1-00.txt") as infile:
Expand Down
1 change: 1 addition & 0 deletions testdata/init_clusters-001.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[["ENSG00000076928", "ENSG00000187244", "ENSG00000159374", "ENSG00000143333", "ENSG00000116863", "ENSG00000008441", "ENSG00000196923", "ENSG00000247596", "ENSG00000055070", "ENSG00000042753"], ["ENSG00000006756", "ENSG00000040487", "ENSG00000101605", "ENSG00000175556", "ENSG00000179820", "ENSG00000198799", "ENSG00000166484", "ENSG00000019995", "ENSG00000092148"], ["ENSG00000111644", "ENSG00000187961", "ENSG00000122912", "ENSG00000198556", "ENSG00000006282", "ENSG00000012171", "ENSG00000138111", "ENSG00000099866", "ENSG00000061656"], ["ENSG00000175467", "ENSG00000163171", "ENSG00000134184", "ENSG00000114503", "ENSG00000083635", "ENSG00000167004", "ENSG00000029363", "ENSG00000067596", "ENSG00000126883"], ["ENSG00000124588", "ENSG00000179826", "ENSG00000164751", "ENSG00000243958", "ENSG00000162598", "ENSG00000130513", "ENSG00000138463", "ENSG00000162594", "ENSG00000281618"], ["ENSG00000108639", "ENSG00000154065", "ENSG00000206113", "ENSG00000135912", "ENSG00000140521", "ENSG00000108578", "ENSG00000187840", "ENSG00000172828", "ENSG00000145220"], ["ENSG00000092140", "ENSG00000183579", "ENSG00000084652", "ENSG00000100395", "ENSG00000113648", "ENSG00000107833", "ENSG00000197771", "ENSG00000112715", "ENSG00000069998"], ["ENSG00000134028", "ENSG00000204923", "ENSG00000135631", "ENSG00000141068", "ENSG00000139428", "ENSG00000118640", "ENSG00000171045", "ENSG00000152056", "ENSG00000103642"], ["ENSG00000012174", "ENSG00000072506", "ENSG00000129562", "ENSG00000170522", "ENSG00000163170", "ENSG00000175137", "ENSG00000153029", "ENSG00000117262"], ["ENSG00000073578", "ENSG00000162148", "ENSG00000164654", "ENSG00000177042", "ENSG00000125046", "ENSG00000140522", "ENSG00000153790", "ENSG00000151292"], ["ENSG00000165309", "ENSG00000176927", "ENSG00000056586", "ENSG00000077458", "ENSG00000009830", "ENSG00000135913", "ENSG00000156313", "ENSG00000133131"], ["ENSG00000137145", "ENSG00000012174", "ENSG00000101935", "ENSG00000162599", "ENSG00000165704", "ENSG00000176399", "ENSG00000283620", "ENSG00000109572"], ["ENSG00000262795", "ENSG00000107651", "ENSG00000141068", "ENSG00000113389", "ENSG00000073578", "ENSG00000124214", "ENSG00000105856", "ENSG00000114030"], ["ENSG00000166855", "ENSG00000172671", "ENSG00000176920", "ENSG00000089009", "ENSG00000155506", "ENSG00000149187", "ENSG00000104112", "ENSG00000161021"], ["ENSG00000197776", "ENSG00000178922", "ENSG00000147316", "ENSG00000138468", "ENSG00000100393", "ENSG00000263874", "ENSG00000134186", "ENSG00000130518"], ["ENSG00000188596", "ENSG00000169203", "ENSG00000183571", "ENSG00000184702", "ENSG00000138468", "ENSG00000151923", "ENSG00000156502", "ENSG00000167103"], ["ENSG00000164758", "ENSG00000020426", "ENSG00000105851", "ENSG00000257341", "ENSG00000005471", "ENSG00000181218", "ENSG00000163466", "ENSG00000135919"], ["ENSG00000132744", "ENSG00000163468", "ENSG00000175550", "ENSG00000198795", "ENSG00000072501", "ENSG00000182010", "ENSG00000135776", "ENSG00000183864"], ["ENSG00000183570", "ENSG00000132185", "ENSG00000172164", "ENSG00000142765", "ENSG00000182508", "ENSG00000163376", "ENSG00000198793", "ENSG00000066855"], ["ENSG00000130517", "ENSG00000155330", "ENSG00000188021", "ENSG00000100483", "ENSG00000109220", "ENSG00000124214", "ENSG00000111647"], ["ENSG00000154889", "ENSG00000204923", "ENSG00000149716", "ENSG00000250565", "ENSG00000134278", "ENSG00000266173", "ENSG00000135632"], ["ENSG00000088205", "ENSG00000164414", "ENSG00000179826", "ENSG00000267228", "ENSG00000135775", "ENSG00000133138", "ENSG00000076321"], ["ENSG00000101935", "ENSG00000174780", "ENSG00000198554", "ENSG00000166851", "ENSG00000281306", "ENSG00000145220", "ENSG00000058085"], ["ENSG00000180667", "ENSG00000137309", "ENSG00000101871", "ENSG00000123933", "ENSG00000064726", "ENSG00000111832", "ENSG00000167470"], ["ENSG00000149792", "ENSG00000168763", "ENSG00000169989", "ENSG00000156509", "ENSG00000084754", "ENSG00000185158", "ENSG00000167536"], ["ENSG00000147883", "ENSG00000196550", "ENSG00000163374", "ENSG00000165304", "ENSG00000236843", "ENSG00000222009", "ENSG00000101871"], ["ENSG00000120075", "ENSG00000100399", "ENSG00000278437", "ENSG00000155719", "ENSG00000236177", "ENSG00000188026", "ENSG00000159450"], ["ENSG00000089009", "ENSG00000106633", "ENSG00000225526", "ENSG00000155714", "ENSG00000184787", "ENSG00000151150", "ENSG00000104112"], ["ENSG00000104808", "ENSG00000182013", "ENSG00000147883", "ENSG00000105401", "ENSG00000164756", "ENSG00000130758", "ENSG00000198792"], ["ENSG00000173193", "ENSG00000163378", "ENSG00000164542", "ENSG00000160710", "ENSG00000247595", "ENSG00000143641", "ENSG00000148225"], ["ENSG00000154262", "ENSG00000037749", "ENSG00000154263", "ENSG00000154265", "ENSG00000161021", "ENSG00000188596", "ENSG00000168297"], ["ENSG00000172785", "ENSG00000120498", "ENSG00000112695", "ENSG00000255767", "ENSG00000103037", "ENSG00000169989", "ENSG00000154269"], ["ENSG00000189283", "ENSG00000213401", "ENSG00000163467", "ENSG00000217442", "ENSG00000130751", "ENSG00000160712", "ENSG00000187024"], ["ENSG00000138942", "ENSG00000188419", "ENSG00000175550", "ENSG00000072501", "ENSG00000134183", "ENSG00000198246", "ENSG00000163463"], ["ENSG00000116478", "ENSG00000276126", "ENSG00000182010", "ENSG00000204118", "ENSG00000106404", "ENSG00000213401", "ENSG00000183248"], ["ENSG00000176928", "ENSG00000144724", "ENSG00000177045", "ENSG00000188620", "ENSG00000178927", "ENSG00000124215", "ENSG00000011478"], ["ENSG00000126882", "ENSG00000140526", "ENSG00000240021", "ENSG00000086015", "ENSG00000148229", "ENSG00000108576", "ENSG00000183354"], ["ENSG00000099864", "ENSG00000172167", "ENSG00000167476", "ENSG00000026652", "ENSG00000164418", "ENSG00000105855", "ENSG00000112531"], ["ENSG00000180660", "ENSG00000198794", "ENSG00000184012", "ENSG00000105402", "ENSG00000183578", "ENSG00000128849", "ENSG00000278545"]]
1 change: 1 addition & 0 deletions testdata/ref_axes-000.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"1": [-3.2053257675159794, 0.7336668967026231, 10.015249539735647, -5.0157843801320166, 0.1244238925363025, -4.566642689742843, -6.028487634563582, -2.6422967842808855, -2.239835807761678, 1.2655478068534982, -4.211017432635741, -0.06750486152444292, 1.7540481217371204, -0.44550924423364835, -4.626940669175253, -5.182963010663376, 1.3226698164424027, 4.922411509397451, -4.885202241021158, 0.10525237041097457, -1.5431771377769472, 4.934620252680376, 8.16253928472123, -5.07468280252522, 6.829680037240505, -0.3491153464575949, -0.9475711407150105, 5.279577667050634, -3.611202917336161, 1.8087327684635888, 8.585837685613258, 1.584075541276682, -0.2997117040966497, 0.980476986775055, 5.06611161477977, -1.4452838354211912, -2.7739358915838768, 1.171808594937817, 10.376468379412415, -4.563303351956499, -0.5674493289612625, -0.6419440162633384, -0.48251381266449095, 2.7894684583848774, 0.8671551269244898, -2.6772737717059147, -4.848581084979227, -1.248192596725568, -4.488373089657147]}

0 comments on commit 85de964

Please sign in to comment.