From d2ab3264ca3e432db44c43a34f0641d90f465c0b Mon Sep 17 00:00:00 2001 From: Joseph Kuo Date: Tue, 20 Feb 2024 15:14:18 +0100 Subject: [PATCH] remove intersect_posi --- genomkit/regions/gregions.py | 43 ---------------------------- tests/evaluate/evaluate_intersect.py | 6 ---- 2 files changed, 49 deletions(-) diff --git a/genomkit/regions/gregions.py b/genomkit/regions/gregions.py index c700a53..d313946 100644 --- a/genomkit/regions/gregions.py +++ b/genomkit/regions/gregions.py @@ -598,49 +598,6 @@ def make_array_same_length(array_1, array_2): name="")) return res - def intersect_posi(self, target, strandness: bool = False): - def generate_position_array(position_set, merged_list): - boolean_array = [False] * len(merged_list) - # Set elements to True according to pos_1 - for i, element in enumerate(merged_list): - if element in position_set: - boolean_array[i] = True - return np.array(boolean_array) - - def find_intersects(orientation): - pos_1 = self.get_positions_by_seq(sequence=seq, - orientation=orientation) - pos_2 = target.get_positions_by_seq(sequence=seq, - orientation=orientation) - merged_pos = list(pos_1.union(pos_2)) - array_1 = generate_position_array(pos_1, merged_pos) - array_2 = generate_position_array(pos_2, merged_pos) - result_array = array_1 & array_2 - # Find the indices where the value changes - indices = np.where(np.diff(result_array))[0] + 1 - # Group consecutive indices into tuples - ranges = [(merged_pos[indices[i - 1]], - merged_pos[indices[i]]) - for i in range(1, len(indices), 2)] - return ranges - - res = GRegions() - list_seq_self = self.get_sequences(unique=True) - list_seq_target = target.get_sequences(unique=True) - common_seq = [seq for seq in list_seq_self if seq in list_seq_target] - for seq in common_seq: - if strandness: - # positive - ranges_pos = find_intersects(orientation="+") - ranges_neg = find_intersects(orientation="-") - ranges = ranges_pos + ranges_neg - else: - ranges = find_intersects(orientation=None) - for pair in ranges: - res.add(GRegion(sequence=seq, start=pair[0], end=pair[1], - name="")) - return res - def overlap_count(self, target): intersect = self.intersect_python(target, mode="ORIGINAL") return len(intersect) diff --git a/tests/evaluate/evaluate_intersect.py b/tests/evaluate/evaluate_intersect.py index 434ca3c..5b33422 100644 --- a/tests/evaluate/evaluate_intersect.py +++ b/tests/evaluate/evaluate_intersect.py @@ -30,17 +30,11 @@ def time_intersect_python(): def time_intersect_array(): intersect = peaks.intersect_array(genes) -@profile -def time_intersect_posi(): - intersect = peaks.intersect_posi(genes) - repeat_num = 2 execution_time = timeit.timeit(time_intersect_python, number=repeat_num) print('[{:<20}]'.format('intersect_python'), '{:<5.2f}'.format(execution_time), "seconds") execution_time = timeit.timeit(time_intersect_array, number=repeat_num) print('[{:<20}]'.format('intersect_array'), '{:<5.2f}'.format(execution_time), "seconds") -execution_time = timeit.timeit(time_intersect_posi, number=repeat_num) -print('[{:<20}]'.format('intersect_posi'), '{:<5.2f}'.format(execution_time), "seconds") # print(len(intersect)) # intersect.write("intersect_python.bed") # print(len(intersect))