-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmicrograph_operations.py
443 lines (312 loc) · 21.6 KB
/
micrograph_operations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
import numpy as np
from numpy import (array, dot, arccos, clip)
from numpy.linalg import norm
from scipy.spatial import distance
from sklearn.cluster import AgglomerativeClustering
#######################################################################################################################################################
def cluster_beads (All_Coors, CutOff, debug) :
### 'All_Coors' is a list of listed x-y coordinates, e.g. [[0.0, 0.1], [1.0, 1.2]]
### 'CutOff' is the threshold length used for single-linkage clustering
clustering = AgglomerativeClustering(n_clusters=None, linkage='single', distance_threshold=CutOff, compute_full_tree=True).fit(All_Coors)
#print("There are", len(All_Coors), "data points in", len(np.unique(clustering.labels_)), "clusters!\n")
Clusters_Coors = {}
Clusters_IDs = {}
for i in range(len(clustering.labels_)) :
lab = clustering.labels_[i]
if lab in Clusters_Coors.keys() :
Clusters_Coors[lab].append(All_Coors[i])
Clusters_IDs[lab].append(i)
else :
Clusters_Coors[lab] = [All_Coors[i]]
Clusters_IDs[lab] = [i]
return [Clusters_IDs, Clusters_Coors]
#######################################################################################################################################################
#######################################################################################################################################################
def get_distance_score_matrix (All_Coors, Scoring_Func, debug) :
### 'All_Coors' is a list of listed x-y coordinates, e.g. [[0.0, 0.1], [1.0, 1.2]]; most likely a subset of the overall system, i.e., for instance, a cluster of data points
###
###
Distance_Matrix = distance.cdist(All_Coors, All_Coors, 'euclidean')
Distance_Score_Matrix = np.zeros(Distance_Matrix.shape)
for i in range(len(Distance_Matrix)) :
for j in range(len(Distance_Matrix[i])) :
if i != j :
Distance_Score_Matrix[i][j] = Scoring_Func(Distance_Matrix[i][j])
else :
Distance_Score_Matrix[i][j] = 0
return Distance_Score_Matrix
#######################################################################################################################################################
#######################################################################################################################################################
def get_angle_score_matrix (All_Coors, Scoring_Func, debug) :
### 'All_Coors' is a list of listed x-y coordinates, e.g. [[0.0, 0.1], [1.0, 1.2]]; most likely a subset of the overall system, i.e., for instance, a cluster of data points
###
Angle_Matrix = np.zeros((len(All_Coors), len(All_Coors), len(All_Coors)))
for i in range(0, len(Angle_Matrix)) :
for j in range(0, len(Angle_Matrix)) :
for k in range(0, len(Angle_Matrix)) :
if (i == j) or (j == k) or (i == k):
Angle_Matrix[i][j][k] = -1 ##We need to get rid of the zero set initially because that is actually a high-scoring angle
else :
Bond1 = np.array(All_Coors[j]) - np.array(All_Coors[i])
Bond2 = np.array(All_Coors[k]) - np.array(All_Coors[j])
cosine = dot(Bond1,Bond2)/norm(Bond1)/norm(Bond2)
angle = arccos(clip(cosine, -1, 1))
Angle_Matrix[i][j][k] = angle/np.pi * 180
Angle_Score_Matrix = np.zeros(Angle_Matrix.shape)
for i in range(len(Angle_Matrix)) :
for j in range(len(Angle_Matrix[i])) :
for k in range(len(Angle_Matrix[j])) :
if Angle_Matrix[i][j][k] >= 0 : ##Zero score for undefined angles
Angle_Score_Matrix[i][j][k] = Scoring_Func(Angle_Matrix[i][j][k])
return Angle_Score_Matrix
#######################################################################################################################################################
#######################################################################################################################################################
def get_initial_dimers(Distance_Score_Matrix, Scoring_Func, AvgPixels_IDs, Clusters_IDs, lab, debug) :
###
###
Dimer_IDs = []
Dimer_Scores = []
[r, c] = np.triu_indices(len(Distance_Score_Matrix), 1) ##Indices for the upper triangle (i.e., offset by 1 from the diagonal) of a square matrix of given length
Sorted_Distance_Scores = sorted(Distance_Score_Matrix[r,c], reverse=True) ##Sorted values of that given matrix section (off-diagonal, thus non-zero distances with itself, but note that the distance scores may well be zero)
scorenum = 0
while scorenum < len(Sorted_Distance_Scores) : ##Using a while instead of for loop because there may be multiple pairs with the same score (so we increment the running parameter for every stored pair)
if debug: print("Scorenum is", scorenum)
distscore = Sorted_Distance_Scores[int(scorenum)] ##Making the parameter an integer because it becomes a (whole-numbered!) float below
if debug: print("Currdistscore is", distscore)
if distscore > 0 : ##Consider only scores that are not beyond the min/max region
Distance_Score_rowIDs = np.where(Distance_Score_Matrix == distscore)[0] ##We sorted the distance scores only by the upper triangle of the distance matrix, but we are checking against the whole matrix ...
Distance_Score_colIDs = np.where(Distance_Score_Matrix == distscore)[1] ##... so, for each score, there will be both (i,j) and (j,i) pairs (and, of course, i!=j because we had disregarded the diagonal)
for idnum in range(len(Distance_Score_rowIDs)) : ##This is needed if the same score occurs more than once, but also, in particular, each dimer is saved in both "directions", i.e., both (i,j) and (j,i) are continued here, as either could have a better trimer score
beadscore1 = Scoring_Func(AvgPixels_IDs[Clusters_IDs[lab][Distance_Score_rowIDs[idnum]]])
beadscore2 = Scoring_Func(AvgPixels_IDs[Clusters_IDs[lab][Distance_Score_colIDs[idnum]]])
if (beadscore1 > 0) and (beadscore2 > 0) : ##Consider only pixel-intensity scores that are not beyond the min/max region
Dimer_IDs.append([Distance_Score_rowIDs[idnum], Distance_Score_colIDs[idnum]])
Dimer_Scores.append(np.log(distscore) + np.log(beadscore1) + np.log(beadscore2)) ##The dimer score consists of the distance score plus the two individual bead scores.
if debug: print("CurrDimerID", Dimer_IDs[-1])
if debug: print("CurrDimerScore", Dimer_Scores[-1])
scorenum += 0.5 ##Only adding one half because there will be two "idnum" elements, i.e., (i,j) and (j,i), for each "distscore"
else :
scorenum += 1
return [Dimer_IDs, Dimer_Scores] # Dimers are automatically sorted by score because the array is filled based on 'Sorted_Distance_Scores'
#######################################################################################################################################################
#######################################################################################################################################################
def extend_oligomers (Oligomer_IDs_ByLength, Oligomer_Scores_ByLength, oligolen, Distance_Score_Matrix, Angle_Score_Matrix, Scoring_Func, AvgPixels_IDs, Clusters_IDs, lab, debug) :
###
###
Curr_Oligomer_IDs = []
Curr_Oligomer_Scores = []
Prev_Oligomer_Num = len(Oligomer_IDs_ByLength[oligolen-1])
for oligonum in range(Prev_Oligomer_Num) :
oligomer = Oligomer_IDs_ByLength[oligolen-1][oligonum]
oligoscore = Oligomer_Scores_ByLength[oligolen-1][oligonum]
bead_pre = oligomer[-1]
bead_prepre = oligomer[-2]
if debug: print("Checking Oligomer", oligomer, "with oligoscore", oligoscore)
Sorted_Distance_Scores = sorted(Distance_Score_Matrix[bead_pre], reverse=True)
if debug: print("Its sorted distance scores to new beads are", Sorted_Distance_Scores)
scorenum = 0
while scorenum < len(Sorted_Distance_Scores) : ##Similar approach to 'get_initial_dimers'
distscore = Sorted_Distance_Scores[scorenum]
if debug: print("ScoreNum is", scorenum, "and the corresponding distance score is", distscore)
if distscore > 0 :
Distance_Score_colIDs = np.where(Distance_Score_Matrix[bead_pre] == distscore)[0]
if debug: print("It occurs with these potential new beads", Distance_Score_colIDs)
for idnum in range(len(Distance_Score_colIDs)) :
if Distance_Score_colIDs[idnum] in oligomer: ##It might be one of the previous beads, in particular 'bead_prepre'
if debug: print(Distance_Score_colIDs[idnum], "already in", oligomer)
scorenum += 1
continue
else :
beadscore = Scoring_Func(AvgPixels_IDs[Clusters_IDs[lab][Distance_Score_colIDs[idnum]]]) ##Cluster_IDs and lab defined in 'main'
anglescore = Angle_Score_Matrix[bead_prepre][bead_pre][Distance_Score_colIDs[idnum]]
if debug: print("Addding bead", Distance_Score_colIDs[idnum], "would give the new distance score", distscore, "and angle score", anglescore, "and bead score", beadscore)
if (beadscore > 0) and (anglescore > 0) : ##Consider only scores that are not beyond the min/max angle and pixel-intensity region
new_oligomer = oligomer + [Distance_Score_colIDs[idnum]]
Curr_Oligomer_IDs.append(new_oligomer)
Curr_Oligomer_Scores.append(oligoscore + np.log(distscore) + np.log(anglescore) + np.log(beadscore))
if debug: print("new oligomer is", new_oligomer, "with log-score", Curr_Oligomer_Scores[-1], "\n")
scorenum += 1
else :
scorenum += 1 ##A distance score of 0.0 will occur this time because of distances to itself (diagonal values were excluded before)
return [Curr_Oligomer_IDs, Curr_Oligomer_Scores]
#######################################################################################################################################################
#######################################################################################################################################################
def sort_oligomers_by_score (Oligomer_IDs, Oligomer_Scores, order, debug) :
###
###
##Sorting by increasing score
Oligomer_IDs_SortedByScore = [ Oligomer_IDs for Oligomer_Scores,Oligomer_IDs in sorted(zip(Oligomer_Scores,Oligomer_IDs)) ]
Oligomer_Scores.sort()
Oligomer_Scores_SortedByScore = Oligomer_Scores ##Just to have the names of 'IDs' and 'Scores' consistent
if order == 'decr' :
##Sorting by decreasing score
Oligomer_IDs_SortedByScore.reverse()
Oligomer_Scores_SortedByScore.reverse()
return [Oligomer_IDs_SortedByScore, Oligomer_Scores_SortedByScore]
#######################################################################################################################################################
#######################################################################################################################################################
def discard_oligomers_by_normalscore_threshold (Oligomer_IDs, Oligomer_Scores, score_threshold, debug) :
###
###
NormalScore_CutOff = np.log(score_threshold) ##probability average per bond and angle used as threshold here
Remaining_Oligomer_IDs = []
Remaining_Oligomer_Scores = []
for i in range(len(Oligomer_IDs)) :
oligomer = Oligomer_IDs[i]
bondnums = len(oligomer) - 1
anglenums = len(oligomer) - 2
normalscore = Oligomer_Scores[i]/float(bondnums+anglenums) ##We normalize the oligomer scores for cross-oligolen comparison within a cluster (e.g. when removing some because of overlapping)
if normalscore > NormalScore_CutOff :
Remaining_Oligomer_IDs.append(oligomer)
Remaining_Oligomer_Scores.append(normalscore)
if debug: print("These are found in the cluster:\nIDs:", Remaining_Oligomer_IDs, "\nScores:", Remaining_Oligomer_Scores, "\n")
return [Remaining_Oligomer_IDs, Remaining_Oligomer_Scores]
#######################################################################################################################################################
#######################################################################################################################################################
def sort_oligomers_by_length (Oligomer_IDs, Oligomer_Scores, order, debug) :
###
###
##Sorting by increasing length (if more than one has the same length and the items were sorted by increasing score, then they are still sorted by increasing score)
Oligomer_Lengths = []
for i in Oligomer_IDs :
Oligomer_Lengths.append(len(i))
Oligomer_Scores_SortedByLen = [ Oligomer_Scores for Oligomer_Lengths,Oligomer_Scores in sorted(zip(Oligomer_Lengths,Oligomer_Scores)) ] ##This sorts by the length (the 1st element in the zipped list); when the lengths are identical, then it sorts by the score value (the 2nd element in the zipped list)
Oligomer_IDs.sort(key=len) ##Sorting by length
Oligomer_IDs_SortedByLen = Oligomer_IDs ##Just to have the names of 'IDs' and 'Scores' consistent
if order == 'decr' :
##Sorting by decreasing length
Oligomer_IDs_SortedByLen.reverse()
Oligomer_Scores_SortedByLen.reverse()
return [Oligomer_IDs_SortedByLen, Oligomer_Scores_SortedByLen]
#######################################################################################################################################################
#######################################################################################################################################################
def remove_oligomer_overlaps (Oligomer_IDs, Oligomer_Scores) :
###
###
NonOverlap_Oligomer_IDs = []
NonOverlap_Oligomer_Score = []
NonOverlap_Oligomer_IDs.append(Oligomer_IDs[0])
NonOverlap_Oligomer_Score.append(Oligomer_Scores[0])
for i in range(len(Oligomer_IDs)) :
oligomer = Oligomer_IDs[i]
score = Oligomer_Scores[i]
overlap = 0
for saved_oligomer in NonOverlap_Oligomer_IDs :
if any(bead in saved_oligomer for bead in oligomer) :
overlap += 1
if not overlap :
NonOverlap_Oligomer_IDs.append(oligomer)
NonOverlap_Oligomer_Score.append(score)
return [NonOverlap_Oligomer_IDs, NonOverlap_Oligomer_Score]
#######################################################################################################################################################
#######################################################################################################################################################
def assign_oligomers (All_Coors, Oligomer_Coors_ByLength, MinDist) :
### 'AllCoors' is a list of listed x-y coordinates, e.g. [[0.0, 0.1], [1.0, 1.2]]
### 'Oligomer_Coors_ByLength' is a dictionary where the key is the oligomer length and the corresponding value is a list of oligomers (of that length), which itself are lists of listed x-y coordinantes, e.g. [[[0,1], [1,2], [2,3]], [[1,0], [1,1]]]
### 'MinDist' is the minimum distance that an oligomer is allowed to have to another bead or particle in order to be counted as 'cleared'
Singleton_Coors = [] # The list of singleton coordinates, that are NOT subject to any distance criterion
Cleared_Singleton_Coors = []
UnCleared_Singleton_Coors = []
Cleared_Oligomer_Coors_ByLength = {} # The dictionary of cleared oligomers (with their length being the key value)
UnCleared_Oligomer_Coors_ByLength = {} # The dictionary of oligomers that are not cleared, i.e., they are in proximity to another bead by less than 'MinDist'
UnCleared_Oligomer_Coors = [] # The list of coordinates that are from uncleared oligomers (required to get the locked oligomers easily)
Locked_Oligomer_Coors_ByLength = {} # The dictionary of oligomers that are not cleared because they are in proximity to another oligomer by less than 'MinDist'
Blocked_Oligomer_Coors_ByLength = {} # The dictionary of oligomers that are not cleared because they are in proximity to a singleton by less than 'MinDist'
Possible_Oligomer_Coors_ByLength = {} # The dictionary of putative oligomers, i.e., they are either 'Cleared' or (currently) 'Blocked' by a singleton
Excluded_Coors = [] # The list of all uncleared beads, i.e., uncleared oligomers AND singletons
####### IDENTIFY SINGLETONS #########
print("Assigning all singletons")
for bead in All_Coors :
single = 1
for oligolen in Oligomer_Coors_ByLength.keys() :
for oligomer in Oligomer_Coors_ByLength[oligolen] :
if bead in oligomer :
single = 0
continue
if single == 0 :
continue
if single :
Singleton_Coors.append(bead)
# if len(Singleton_Coors)%200 == 0 : print("found another 200 singletons", len(Singleton_Coors))
######################################
###### IDENTIFY UNCLEARED OLIGOMERS #######
print("Assigning uncleared oligomers")
for oligolen in Oligomer_Coors_ByLength.keys() :
UnCleared_Oligomer_Coors_ByLength[oligolen] = []
for oligomer in Oligomer_Coors_ByLength[oligolen] :
for bead1 in oligomer :
for bead2 in All_Coors :
if bead2 in oligomer :
continue
else :
bondvec = np.array(bead1) - np.array(bead2)
if (norm(bondvec) < MinDist) and (oligomer not in UnCleared_Oligomer_Coors_ByLength[oligolen]) :
UnCleared_Oligomer_Coors_ByLength[oligolen].append(oligomer)
for oligolen in UnCleared_Oligomer_Coors_ByLength.keys() :
for oligomer in UnCleared_Oligomer_Coors_ByLength[oligolen] :
for bead in oligomer :
UnCleared_Oligomer_Coors.append(bead)
#################################################
###### IDENTIFY CLEARED OLIGOMERS #######
print("Assigning cleared oligomers")
for oligolen in Oligomer_Coors_ByLength.keys() :
Cleared_Oligomer_Coors_ByLength[oligolen] = []
for oligomer in Oligomer_Coors_ByLength[oligolen] :
if oligomer not in UnCleared_Oligomer_Coors_ByLength[oligolen] :
Cleared_Oligomer_Coors_ByLength[oligolen].append(oligomer)
###########################################
###### IDENTIFY LOCKED OLIGOMERS #######
print("Assigning locked oligomers")
for oligolen in UnCleared_Oligomer_Coors_ByLength.keys() :
Locked_Oligomer_Coors_ByLength[oligolen] = []
for oligomer in UnCleared_Oligomer_Coors_ByLength[oligolen] :
for bead1 in oligomer :
for bead2 in UnCleared_Oligomer_Coors :
if bead2 in oligomer:
continue
else :
bondvec = np.array(bead1) - np.array(bead2)
if (norm(bondvec) < MinDist) and (oligomer not in Locked_Oligomer_Coors_ByLength[oligolen]) :
Locked_Oligomer_Coors_ByLength[oligolen].append(oligomer)
########################################
###### IDENTIFY BLOCKED OLIGOMERS #######
print("Assigning blocked (and thus the remaining 'possible') oligomers")
for oligolen in UnCleared_Oligomer_Coors_ByLength.keys() :
Blocked_Oligomer_Coors_ByLength[oligolen] = []
Possible_Oligomer_Coors_ByLength[oligolen] = []
for oligomer in UnCleared_Oligomer_Coors_ByLength[oligolen] :
if oligomer not in Locked_Oligomer_Coors_ByLength[oligolen] :
Blocked_Oligomer_Coors_ByLength[oligolen].append(oligomer)
for oligolen in Blocked_Oligomer_Coors_ByLength.keys() :
for oligomer in Blocked_Oligomer_Coors_ByLength[oligolen] :
Possible_Oligomer_Coors_ByLength[oligolen].append(oligomer)
for oligomer in Cleared_Oligomer_Coors_ByLength[oligolen] :
Possible_Oligomer_Coors_ByLength[oligolen].append(oligomer)
##########################################
###### IDENTIFY UNCLEARED SINGLETONS #######
print("Assigning uncleared singletons (and thus all excluded beads)")
for beadID1 in range(len(Singleton_Coors)-1) :
# if beadID1%200 == 0: print("Checking bead", beadID1, "/", len(Singleton_Coors))
bead1 = Singleton_Coors[beadID1]
for beadID2 in range(beadID1+1, len(Singleton_Coors)) :
bead2 = Singleton_Coors[beadID2]
bondvec = np.array(bead1) - np.array(bead2)
if (norm(bondvec) < MinDist) :
if bead1 not in UnCleared_Singleton_Coors : # We don't wannt to check this earlier and 'continue' b/c we might leave out a bead that should be counnted
UnCleared_Singleton_Coors.append(bead1)
if bead2 not in UnCleared_Singleton_Coors :
UnCleared_Singleton_Coors.append(bead2)
for bead1 in Singleton_Coors :
for bead2 in UnCleared_Oligomer_Coors :
bondvec = np.array(bead1) - np.array(bead2)
if (norm(bondvec) < MinDist) and (bead1 not in UnCleared_Singleton_Coors) :
UnCleared_Singleton_Coors.append(bead1)
continue
if bead1 not in UnCleared_Singleton_Coors :
Cleared_Singleton_Coors.append(bead1)
for bead in UnCleared_Singleton_Coors :
Excluded_Coors.append(bead)
for bead in UnCleared_Oligomer_Coors :
Excluded_Coors.append(bead)
###############################################
return [Singleton_Coors, Cleared_Singleton_Coors, UnCleared_Singleton_Coors, Cleared_Oligomer_Coors_ByLength, UnCleared_Oligomer_Coors_ByLength, Blocked_Oligomer_Coors_ByLength, Possible_Oligomer_Coors_ByLength, Locked_Oligomer_Coors_ByLength, Excluded_Coors]