Skip to content

Commit

Permalink
refactoring: clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
weiju committed Jun 28, 2024
1 parent 5768a6f commit 065b500
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 14 deletions.
25 changes: 12 additions & 13 deletions miner/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def FrequencyMatrix(matrix,overExpThreshold = 1):

fm = np.zeros((len(index),len(index)))
for key in list(frequency_dictionary.keys()):
tmp = frequency_dictionary[key]
tmp = sorted(frequency_dictionary[key])
if len(tmp) == 0:
continue
count = Counter(tmp)
Expand Down Expand Up @@ -799,20 +799,17 @@ def unmix(df,iterations=25,returnAll=False):
sumDf1 = df.sum(axis=1)
maxSum = df.index[np.argmax(np.array(sumDf1))]
hits = np.where(df.loc[maxSum]>0)[0]
hitIndex = list(df.index[hits])
hitIndex = sorted(df.index[hits])
block = df.loc[hitIndex,hitIndex]
blockSum = block.sum(axis=1)
coreBlock = list(blockSum.index[np.where(blockSum>=np.median(blockSum))[0]])
remainder = list(set(df.index)-set(coreBlock))
coreBlock = sorted(blockSum.index[np.where(blockSum>=np.median(blockSum))[0]])
remainder = sorted(set(df.index)-set(coreBlock))
frequencyClusters.append(coreBlock)
if len(remainder)==0:
return frequencyClusters
if len(coreBlock)==1:
return frequencyClusters
df = df.loc[remainder,remainder]
if returnAll is True:
if len(remainder) == 0 or len(coreBlock) == 1:
return sorted(frequencyClusters)
if returnAll:
frequencyClusters.append(remainder)
return frequencyClusters
return sorted(frequencyClusters)

def remix(df,frequencyClusters):
finalClusters = []
Expand Down Expand Up @@ -984,6 +981,8 @@ def recursive_alignment(geneset,expressionData,minNumberGenes=6,
return reconstructedList


NUM_PCA_COMPONENTS = 10

def cluster(expressionData, minNumberGenes=6, minNumberOverExpSamples=4, maxSamplesExcluded=0.50, svd_solver="arpack",
random_state=12, overExpressionThreshold=80, pct_threshold=80):
df = expressionData.copy()
Expand All @@ -996,7 +995,7 @@ def cluster(expressionData, minNumberGenes=6, minNumberOverExpSamples=4, maxSamp

startTimer = time.time()
trial = -1
pca = PCA(10, random_state=random_state)
pca = PCA(NUM_PCA_COMPONENTS, random_state=random_state)

for step in range(maxStep):
trial += 1
Expand All @@ -1010,7 +1009,7 @@ def cluster(expressionData, minNumberGenes=6, minNumberOverExpSamples=4, maxSamp
principalDf = pd.DataFrame(principalComponents)
principalDf.index = df.columns

for i in range(10):
for i in range(NUM_PCA_COMPONENTS):
pearson = pearson_array(np.array(df), np.array(principalDf[i]))
if len(pearson) == 0:
continue
Expand Down
2 changes: 1 addition & 1 deletion testdata/init_clusters-001.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[["ENSG00000247596", "ENSG00000142920", "ENSG00000076928", "ENSG00000008441", "ENSG00000165409", "ENSG00000159374", "ENSG00000114268", "ENSG00000055070", "ENSG00000196923", "ENSG00000143333"], ["ENSG00000086015", "ENSG00000089009", "ENSG00000104112", "ENSG00000137309", "ENSG00000149187", "ENSG00000155506", "ENSG00000164756", "ENSG00000166855", "ENSG00000172671", "ENSG00000176920"], ["ENSG00000006756", "ENSG00000019995", "ENSG00000040487", "ENSG00000092148", "ENSG00000101605", "ENSG00000166484", "ENSG00000175556", "ENSG00000179820", "ENSG00000198799"], ["ENSG00000187961", "ENSG00000012171", "ENSG00000138111", "ENSG00000061656", "ENSG00000198556", "ENSG00000099866", "ENSG00000111644", "ENSG00000006282", "ENSG00000122912"], ["ENSG00000029363", "ENSG00000067596", "ENSG00000083635", "ENSG00000114503", "ENSG00000126883", "ENSG00000134184", "ENSG00000163171", "ENSG00000167004", "ENSG00000175467"], ["ENSG00000162598", "ENSG00000243958", "ENSG00000130513", "ENSG00000124588", "ENSG00000281618", "ENSG00000162594", "ENSG00000179826", "ENSG00000138463", "ENSG00000164751"], ["ENSG00000108578", "ENSG00000108639", "ENSG00000135912", "ENSG00000140521", "ENSG00000145220", "ENSG00000154065", "ENSG00000172828", "ENSG00000187840", "ENSG00000206113"], ["ENSG00000103642", "ENSG00000118640", "ENSG00000134028", "ENSG00000135631", "ENSG00000139428", "ENSG00000141068", "ENSG00000152056", "ENSG00000171045", "ENSG00000204923"], ["ENSG00000100393", "ENSG00000130518", "ENSG00000134186", "ENSG00000138468", "ENSG00000147316", "ENSG00000178922", "ENSG00000197776", "ENSG00000263874", "ENSG00000278062"], ["ENSG00000100077", "ENSG00000100399", "ENSG00000155719", "ENSG00000159450", "ENSG00000169439", "ENSG00000187244", "ENSG00000188026", "ENSG00000236177", "ENSG00000278437"], ["ENSG00000103037", "ENSG00000050130", "ENSG00000160716", "ENSG00000117266", "ENSG00000069712", "ENSG00000169989", "ENSG00000168152", "ENSG00000172785", "ENSG00000255767"], ["ENSG00000012174", "ENSG00000072506", "ENSG00000117262", "ENSG00000129562", "ENSG00000153029", "ENSG00000163170", "ENSG00000170522", "ENSG00000175137"], ["ENSG00000073578", "ENSG00000125046", "ENSG00000140522", "ENSG00000151292", "ENSG00000153790", "ENSG00000162148", "ENSG00000164654", "ENSG00000177042"], ["ENSG00000077458", "ENSG00000156313", "ENSG00000056586", "ENSG00000009830", "ENSG00000133131", "ENSG00000165309", "ENSG00000176927", "ENSG00000135913"], ["ENSG00000012174", "ENSG00000101935", "ENSG00000165704", "ENSG00000176399", "ENSG00000109572", "ENSG00000283620", "ENSG00000162599", "ENSG00000137145"], ["ENSG00000124214", "ENSG00000141068", "ENSG00000114030", "ENSG00000105856", "ENSG00000107651", "ENSG00000113389", "ENSG00000262795", "ENSG00000073578"], ["ENSG00000084754", "ENSG00000106404", "ENSG00000119760", "ENSG00000167536", "ENSG00000168763", "ENSG00000169989", "ENSG00000185158", "ENSG00000204118"], ["ENSG00000104804", "ENSG00000122728", "ENSG00000124587", "ENSG00000164597", "ENSG00000166529", "ENSG00000167100", "ENSG00000174788", "ENSG00000182013"], ["ENSG00000049192", "ENSG00000132744", "ENSG00000135205", "ENSG00000151150", "ENSG00000167800", "ENSG00000184254", "ENSG00000217442", "ENSG00000235194"], ["ENSG00000124214", "ENSG00000188021", "ENSG00000109220", "ENSG00000155330", "ENSG00000111647", "ENSG00000130517", "ENSG00000100483"], ["ENSG00000134278", "ENSG00000135632", "ENSG00000149716", "ENSG00000154889", "ENSG00000204923", "ENSG00000250565", "ENSG00000266173"], ["ENSG00000088205", "ENSG00000133138", "ENSG00000076321", "ENSG00000179826", "ENSG00000267228", "ENSG00000135775", "ENSG00000164414"], ["ENSG00000101935", "ENSG00000281306", "ENSG00000145220", "ENSG00000198554", "ENSG00000058085", "ENSG00000174780", "ENSG00000166851"], ["ENSG00000067840", "ENSG00000135636", "ENSG00000163464", "ENSG00000164047", "ENSG00000166523", "ENSG00000166527", "ENSG00000171049"], ["ENSG00000138468", "ENSG00000151923", "ENSG00000156502", "ENSG00000167103", "ENSG00000183571", "ENSG00000184702", "ENSG00000188596"], ["ENSG00000011478", "ENSG00000037042", "ENSG00000106638", "ENSG00000177045", "ENSG00000177951", "ENSG00000186088", "ENSG00000198551"], ["ENSG00000005471", "ENSG00000020426", "ENSG00000105851", "ENSG00000135919", "ENSG00000163376", "ENSG00000164758", "ENSG00000181218"], ["ENSG00000011478", "ENSG00000076924", "ENSG00000105401", "ENSG00000135919", "ENSG00000163466", "ENSG00000177045", "ENSG00000198792"], ["ENSG00000072210", "ENSG00000154265", "ENSG00000169989", "ENSG00000180881", "ENSG00000185158", "ENSG00000261603", "ENSG00000278789"], ["ENSG00000037749", "ENSG00000154262", "ENSG00000154263", "ENSG00000154265", "ENSG00000161021", "ENSG00000168763", "ENSG00000214694"], ["ENSG00000144724", "ENSG00000149792", "ENSG00000152670", "ENSG00000167807", "ENSG00000176396", "ENSG00000178927", "ENSG00000240747"], ["ENSG00000143337", "ENSG00000143641", "ENSG00000154269", "ENSG00000160712", "ENSG00000163374", "ENSG00000173193", "ENSG00000185155"], ["ENSG00000078487", "ENSG00000084652", "ENSG00000164411", "ENSG00000183576", "ENSG00000184708", "ENSG00000198793", "ENSG00000250021"], ["ENSG00000072501", "ENSG00000100395", "ENSG00000138942", "ENSG00000163468", "ENSG00000175550", "ENSG00000183579", "ENSG00000198246"], ["ENSG00000021300", "ENSG00000106633", "ENSG00000107831", "ENSG00000113384", "ENSG00000113645", "ENSG00000132749", "ENSG00000174099"], ["ENSG00000002586", "ENSG00000034693", "ENSG00000111834", "ENSG00000114698", "ENSG00000116473", "ENSG00000120519", "ENSG00000188419"], ["ENSG00000268500", "ENSG00000101938", "ENSG00000162591", "ENSG00000137409", "ENSG00000184014", "ENSG00000196616", "ENSG00000197506"]]
[["ENSG00000002586", "ENSG00000005471", "ENSG00000029534", "ENSG00000120519", "ENSG00000142765", "ENSG00000164047", "ENSG00000171049", "ENSG00000178922", "ENSG00000263874", "ENSG00000278062"], ["ENSG00000106244", "ENSG00000113384", "ENSG00000128944", "ENSG00000130751", "ENSG00000142252", "ENSG00000163467", "ENSG00000163468", "ENSG00000165643", "ENSG00000166855", "ENSG00000177504"], ["ENSG00000006756", "ENSG00000019995", "ENSG00000040487", "ENSG00000092148", "ENSG00000101605", "ENSG00000166484", "ENSG00000175556", "ENSG00000179820", "ENSG00000198799"], ["ENSG00000029363", "ENSG00000067596", "ENSG00000083635", "ENSG00000114503", "ENSG00000126883", "ENSG00000134184", "ENSG00000163171", "ENSG00000167004", "ENSG00000175467"], ["ENSG00000042753", "ENSG00000116863", "ENSG00000130755", "ENSG00000132744", "ENSG00000135636", "ENSG00000155719", "ENSG00000187244", "ENSG00000236177", "ENSG00000278437"], ["ENSG00000103642", "ENSG00000118640", "ENSG00000134028", "ENSG00000135631", "ENSG00000139428", "ENSG00000141068", "ENSG00000152056", "ENSG00000171045", "ENSG00000204923"], ["ENSG00000108578", "ENSG00000108639", "ENSG00000135912", "ENSG00000140521", "ENSG00000145220", "ENSG00000154065", "ENSG00000172828", "ENSG00000187840", "ENSG00000206113"], ["ENSG00000124588", "ENSG00000130513", "ENSG00000138463", "ENSG00000162594", "ENSG00000162598", "ENSG00000164751", "ENSG00000179826", "ENSG00000243958", "ENSG00000281618"], ["ENSG00000005471", "ENSG00000061656", "ENSG00000076924", "ENSG00000143337", "ENSG00000146592", "ENSG00000157224", "ENSG00000160710", "ENSG00000162843", "ENSG00000172671"], ["ENSG00000037280", "ENSG00000106400", "ENSG00000112695", "ENSG00000152969", "ENSG00000165389", "ENSG00000168938", "ENSG00000169432", "ENSG00000184785", "ENSG00000188419"], ["ENSG00000069509", "ENSG00000072501", "ENSG00000096080", "ENSG00000100395", "ENSG00000111834", "ENSG00000138942", "ENSG00000183246", "ENSG00000188419", "ENSG00000250021"], ["ENSG00000009830", "ENSG00000056586", "ENSG00000077458", "ENSG00000133131", "ENSG00000135913", "ENSG00000156313", "ENSG00000165309", "ENSG00000176927"], ["ENSG00000012174", "ENSG00000072506", "ENSG00000117262", "ENSG00000129562", "ENSG00000153029", "ENSG00000163170", "ENSG00000170522", "ENSG00000175137"], ["ENSG00000012174", "ENSG00000101935", "ENSG00000109572", "ENSG00000137145", "ENSG00000162599", "ENSG00000165704", "ENSG00000176399", "ENSG00000283620"], ["ENSG00000073578", "ENSG00000105856", "ENSG00000107651", "ENSG00000113389", "ENSG00000114030", "ENSG00000124214", "ENSG00000141068", "ENSG00000262795"], ["ENSG00000073578", "ENSG00000125046", "ENSG00000140522", "ENSG00000151292", "ENSG00000153790", "ENSG00000162148", "ENSG00000164654", "ENSG00000177042"], ["ENSG00000037042", "ENSG00000127774", "ENSG00000160124", "ENSG00000161970", "ENSG00000168291", "ENSG00000176928", "ENSG00000180592", "ENSG00000186088"], ["ENSG00000066855", "ENSG00000138115", "ENSG00000165685", "ENSG00000172164", "ENSG00000180881", "ENSG00000188596", "ENSG00000198793", "ENSG00000236236"], ["ENSG00000100395", "ENSG00000135776", "ENSG00000163374", "ENSG00000168314", "ENSG00000183864", "ENSG00000196550", "ENSG00000222009", "ENSG00000236843"], ["ENSG00000012171", "ENSG00000026652", "ENSG00000036054", "ENSG00000106636", "ENSG00000111644", "ENSG00000138111", "ENSG00000165643", "ENSG00000197506"], ["ENSG00000021300", "ENSG00000100399", "ENSG00000132749", "ENSG00000154262", "ENSG00000154263", "ENSG00000160712", "ENSG00000184254", "ENSG00000188026"], ["ENSG00000036054", "ENSG00000099864", "ENSG00000100532", "ENSG00000112531", "ENSG00000153107", "ENSG00000163376", "ENSG00000164418", "ENSG00000183571"], ["ENSG00000008441", "ENSG00000105146", "ENSG00000108576", "ENSG00000126882", "ENSG00000135587", "ENSG00000180660", "ENSG00000198242", "ENSG00000247596"], ["ENSG00000058085", "ENSG00000101935", "ENSG00000145220", "ENSG00000166851", "ENSG00000174780", "ENSG00000198554", "ENSG00000281306"], ["ENSG00000067840", "ENSG00000075223", "ENSG00000162591", "ENSG00000166523", "ENSG00000166527", "ENSG00000169744", "ENSG00000275122"], ["ENSG00000076321", "ENSG00000088205", "ENSG00000133138", "ENSG00000135775", "ENSG00000164414", "ENSG00000179826", "ENSG00000267228"], ["ENSG00000100483", "ENSG00000109220", "ENSG00000111647", "ENSG00000124214", "ENSG00000130517", "ENSG00000155330", "ENSG00000188021"], ["ENSG00000134278", "ENSG00000135632", "ENSG00000149716", "ENSG00000154889", "ENSG00000204923", "ENSG00000250565", "ENSG00000266173"], ["ENSG00000037749", "ENSG00000147059", "ENSG00000158683", "ENSG00000159082", "ENSG00000161021", "ENSG00000183578", "ENSG00000247626"], ["ENSG00000101871", "ENSG00000111832", "ENSG00000119138", "ENSG00000123933", "ENSG00000164758", "ENSG00000167470", "ENSG00000180667"], ["ENSG00000130758", "ENSG00000136754", "ENSG00000143337", "ENSG00000143641", "ENSG00000148225", "ENSG00000182013", "ENSG00000198792"], ["ENSG00000143641", "ENSG00000160710", "ENSG00000169989", "ENSG00000180881", "ENSG00000185158", "ENSG00000261603", "ENSG00000275368"], ["ENSG00000019991", "ENSG00000086015", "ENSG00000100485", "ENSG00000111224", "ENSG00000135914", "ENSG00000182010", "ENSG00000240021"], ["ENSG00000076928", "ENSG00000077454", "ENSG00000095637", "ENSG00000100836", "ENSG00000134186", "ENSG00000167800", "ENSG00000168476"], ["ENSG00000138468", "ENSG00000140527", "ENSG00000149243", "ENSG00000167103", "ENSG00000169203", "ENSG00000183354", "ENSG00000184012"], ["ENSG00000154269", "ENSG00000164591", "ENSG00000166529", "ENSG00000167536", "ENSG00000169435", "ENSG00000181856", "ENSG00000253148"], ["ENSG00000011478", "ENSG00000011677", "ENSG00000078487", "ENSG00000147889", "ENSG00000177045", "ENSG00000198168", "ENSG00000255767"], ["ENSG00000112699", "ENSG00000114698", "ENSG00000154265", "ENSG00000163378", "ENSG00000175073", "ENSG00000204116", "ENSG00000225526"], ["ENSG00000113387", "ENSG00000114268", "ENSG00000163462", "ENSG00000163463", "ENSG00000165304", "ENSG00000175294", "ENSG00000176393"]]

0 comments on commit 065b500

Please sign in to comment.