From 61440fdaedc2d8658712ae7273481a13fae65d0f Mon Sep 17 00:00:00 2001 From: dengzq1234 Date: Mon, 16 Dec 2024 11:37:13 +0100 Subject: [PATCH 1/4] correct bugs of 0s in summary --- treeprofiler/tree_annotate.py | 67 +++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/treeprofiler/tree_annotate.py b/treeprofiler/tree_annotate.py index 970a0e1..9d14127 100644 --- a/treeprofiler/tree_annotate.py +++ b/treeprofiler/tree_annotate.py @@ -1348,7 +1348,7 @@ def process_node(node_data): internal_props_num = merge_num_annotations(node_leaves, num_prop, column2method) if internal_props_num: internal_props.update(internal_props_num) - + # Generate consensus sequence consensus_seq = None if alignment and name2seq is not None: # Check alignment and name2seq together @@ -1450,39 +1450,46 @@ def merge_num_annotations(nodes, target_props, column2method): if target_prop != 'dist' and target_prop != 'support': prop_array = np.array(utils.children_prop_array(nodes, target_prop),dtype=np.float64) prop_array = prop_array[~np.isnan(prop_array)] # remove nan data - - - if prop_array.any(): - n, (smin, smax), sm, sv, ss, sk = stats.describe(prop_array) - if num_stat == 'all': - internal_props[utils.add_suffix(target_prop, 'avg')] = sm - internal_props[utils.add_suffix(target_prop, 'sum')] = np.sum(prop_array) - internal_props[utils.add_suffix(target_prop, 'max')] = smax - internal_props[utils.add_suffix(target_prop, 'min')] = smin - if math.isnan(sv) == False: - internal_props[utils.add_suffix(target_prop, 'std')] = sv - else: - internal_props[utils.add_suffix(target_prop, 'std')] = 0 - - elif num_stat == 'avg': - internal_props[utils.add_suffix(target_prop, 'avg')] = sm - elif num_stat == 'sum': - internal_props[utils.add_suffix(target_prop, 'sum')] = np.sum(prop_array) - elif num_stat == 'max': - internal_props[utils.add_suffix(target_prop, 'max')] = smax - elif num_stat == 'min': - internal_props[utils.add_suffix(target_prop, 'min')] = smin - elif num_stat == 'std': - if math.isnan(sv) == False: - internal_props[utils.add_suffix(target_prop, 'std')] = sv - else: - internal_props[utils.add_suffix(target_prop, 'std')] = 0 + if prop_array is None or all(v is None for v in prop_array): + # n, (smin, smax), sm, sv, ss, sk = None, (None, None), None, None, None, None + continue + elif np.all(np.array(prop_array) == 0): + # If prop_array is full of 0 + n, (smin, smax), sm, sv, ss, sk = 0, (0, 0), 0, 0, 0, 0 + elif np.any(prop_array): # Check if any element is non-zero/non-None + n, (smin, smax), sm, sv, ss, sk = stats.describe(prop_array) + else: + # For all other cases, fallback to a default + n, (smin, smax), sm, sv, ss, sk = 0, (0, 0), 0, 0, 0, 0 + + if num_stat == 'all': + internal_props[utils.add_suffix(target_prop, 'avg')] = sm + internal_props[utils.add_suffix(target_prop, 'sum')] = np.sum(prop_array) + internal_props[utils.add_suffix(target_prop, 'max')] = smax + internal_props[utils.add_suffix(target_prop, 'min')] = smin + if math.isnan(sv) == False: + internal_props[utils.add_suffix(target_prop, 'std')] = sv else: - #print('Invalid stat method') - pass + internal_props[utils.add_suffix(target_prop, 'std')] = 0 + + elif num_stat == 'avg': + internal_props[utils.add_suffix(target_prop, 'avg')] = sm + elif num_stat == 'sum': + internal_props[utils.add_suffix(target_prop, 'sum')] = np.sum(prop_array) + elif num_stat == 'max': + internal_props[utils.add_suffix(target_prop, 'max')] = smax + elif num_stat == 'min': + internal_props[utils.add_suffix(target_prop, 'min')] = smin + elif num_stat == 'std': + if math.isnan(sv) == False: + internal_props[utils.add_suffix(target_prop, 'std')] = sv + else: + internal_props[utils.add_suffix(target_prop, 'std')] = 0 else: + #print('Invalid stat method') pass + if internal_props: return internal_props From 5d6a64e75ef5071dbcd364a28c7f2fdbe76bdec6 Mon Sep 17 00:00:00 2001 From: dengzq1234 Date: Mon, 16 Dec 2024 12:29:55 +0100 Subject: [PATCH 2/4] correct layout calling None --- treeprofiler/layouts/staple_layouts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/treeprofiler/layouts/staple_layouts.py b/treeprofiler/layouts/staple_layouts.py index c59aa5f..9addfee 100644 --- a/treeprofiler/layouts/staple_layouts.py +++ b/treeprofiler/layouts/staple_layouts.py @@ -302,6 +302,7 @@ def set_tree_style(self, tree, tree_style): ) def set_node_style(self, node): heatmap_num = node.props.get(self.prop) + if heatmap_num is not None and heatmap_num != 'NaN': heatmap_num = float(heatmap_num) if node.is_leaf: @@ -318,7 +319,7 @@ def set_node_style(self, node): color=gradient_color, padding_x=self.padding_x, padding_y=self.padding_y, tooltip=tooltip) node.add_face(identF, column = self.column, position = 'aligned') - elif node.is_leaf and node.props.get(self.internal_prop): + elif node.is_leaf and node.props.get(self.internal_prop) is not None: heatmap_num = node.props.get(self.internal_prop) heatmap_num = float(heatmap_num) # heatmap @@ -335,7 +336,7 @@ def set_node_style(self, node): color=gradient_color, padding_x=self.padding_x, padding_y=self.padding_y, tooltip=tooltip) node.add_face(identF, column = self.column, position = 'aligned', collapsed_only=True) - elif node.props.get(self.internal_prop): + elif node.props.get(self.internal_prop) is not None: heatmap_num = node.props.get(self.internal_prop) heatmap_num = float(heatmap_num) # heatmap From 8ab6b8453e6fff2ea389d6c7d58f8d851299461b Mon Sep 17 00:00:00 2001 From: dengzq1234 Date: Mon, 16 Dec 2024 15:47:00 +0100 Subject: [PATCH 3/4] ignore empty annotation --- treeprofiler/tree_annotate.py | 36 +++++++++++++++++------------------ treeprofiler/tree_plot.py | 4 ++-- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/treeprofiler/tree_annotate.py b/treeprofiler/tree_annotate.py index 9d14127..4f61098 100644 --- a/treeprofiler/tree_annotate.py +++ b/treeprofiler/tree_annotate.py @@ -971,15 +971,14 @@ def parse_csv(input_files, delimiter='\t', no_headers=False, duplicate=False): columns = defaultdict(list) prop2type = {} def update_metadata(reader, node_header): + # for tar.gz file for row in reader: nodename = row[node_header] del row[node_header] + + # remove missing value #row = {k: 'NaN' if (not v or v.lower() == 'none') else v for k, v in row.items() } ## replace empty to NaN - for k, v in row.items(): # replace missing value - if check_missing(v): - row[k] = 'NaN' - else: - row[k] = v + row = {k: v for k, v in row.items() if not check_missing(v)} if nodename in metadata.keys(): for prop, value in row.items(): @@ -1055,14 +1054,10 @@ def update_prop2type(node_props): nodename = row[node_header] del row[node_header] + # remove missing value #row = {k: 'NaN' if (not v or v.lower() == 'none') else v for k, v in row.items() } ## replace empty to NaN + row = {k: v for k, v in row.items() if not check_missing(v)} - for k, v in row.items(): # replace missing value - if check_missing(v): - row[k] = 'NaN' - else: - row[k] = v - if nodename in metadata.keys(): for prop, value in row.items(): if duplicate: @@ -1083,7 +1078,7 @@ def update_prop2type(node_props): columns[prop].append(value) # append the value into the appropriate list # based on column name k update_prop2type(node_props) - + return metadata, node_props, columns, prop2type def parse_tsv_to_array(input_files, delimiter='\t', no_headers=True): @@ -1259,11 +1254,13 @@ def load_metadata_to_tree(tree, metadata_dict, prop2type={}, taxon_column=None, try: flot_value = float(value) if math.isnan(flot_value): - target_node.add_prop(key, 'NaN') + #target_node.add_prop(key, 'NaN') + pass else: target_node.add_prop(key, flot_value) except (ValueError,TypeError): - target_node.add_prop(key, 'NaN') + #target_node.add_prop(key, 'NaN') + pass # categorical # list @@ -1292,12 +1289,11 @@ def load_metadata_to_tree(tree, metadata_dict, prop2type={}, taxon_column=None, try: flot_value = float(value) if math.isnan(flot_value): - target_node.add_prop(key, 'NaN') + pass else: target_node.add_prop(key, flot_value) except (ValueError,TypeError): - target_node.add_prop(key, 'NaN') - + pass # categorical # list elif key in prop2type and prop2type[key]==list: @@ -1719,8 +1715,10 @@ def parse_emapper_annotations(input_file, delimiter='\t', no_headers=False): nodename = row[node_header] del row[node_header] - for k, v in row.items(): # Replace missing value - row[k] = 'NaN' if check_missing(v) else v + # remove missing value + #row = {k: 'NaN' if (not v or v.lower() == 'none') else v for k, v in row.items() } ## replace empty to NaN + row = {k: v for k, v in row.items() if not check_missing(v)} + metadata[nodename] = dict(row) for k, v in row.items(): # Go over each column name and value columns[k].append(v) # Append the value into the appropriate list based on column name k diff --git a/treeprofiler/tree_plot.py b/treeprofiler/tree_plot.py index d518e55..d658e69 100644 --- a/treeprofiler/tree_plot.py +++ b/treeprofiler/tree_plot.py @@ -844,7 +844,7 @@ def run(args): #### Output ##### popup_prop_keys.extend(list(set(visualized_props))) - popup_prop_keys = tuple(popup_prop_keys) + popup_prop_keys = sorted(tuple(popup_prop_keys)) if args.out_colordict: wrtie_color(total_color_dict) @@ -853,7 +853,7 @@ def run(args): get_image(tree, layouts, args.port, os.path.abspath(file_path)) else: tree.explore(keep_server=True, compress=False, quiet=args.verbose, - layouts=layouts, port=args.port, include_props=sorted(popup_prop_keys), + layouts=layouts, port=args.port, include_props=popup_prop_keys, show_leaf_name=args.hide_leaf_name, show_branch_support=args.hide_branch_support, show_branch_length=args.hide_branch_distance) From 947032f89879cdc3245f85dddb017a7cfbab4741 Mon Sep 17 00:00:00 2001 From: dengzq1234 Date: Mon, 16 Dec 2024 16:05:51 +0100 Subject: [PATCH 4/4] update toolkip prop --- treeprofiler/tree_plot.py | 49 +++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/treeprofiler/tree_plot.py b/treeprofiler/tree_plot.py index d658e69..038cdc9 100644 --- a/treeprofiler/tree_plot.py +++ b/treeprofiler/tree_plot.py @@ -483,23 +483,31 @@ def run(args): if layout == 'heatmap-layout': heatmap_layouts, level = get_heatmap_layouts(tree, args.heatmap_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='min-max') layouts.extend(heatmap_layouts) - visualized_props.extend(args.heatmap_layout) - + for prop in args.heatmap_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) + if layout == 'heatmap-mean-layout': heatmap_mean_layouts, level = get_heatmap_layouts(tree, args.heatmap_mean_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='mean') layouts.extend(heatmap_mean_layouts) - visualized_props.extend(args.heatmap_mean_layout) + for prop in args.heatmap_mean_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) if layout == 'heatmap-zscore-layout': heatmap_zscore_layouts, level = get_heatmap_layouts(tree, args.heatmap_zscore_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='zscore') layouts.extend(heatmap_zscore_layouts) - visualized_props.extend(args.heatmap_zscore_layout) + for prop in args.heatmap_zscore_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) if layout == 'label-layout': label_layouts, level, color_dict = get_label_layouts(tree, args.label_layout, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) layouts.extend(label_layouts) total_color_dict.append(color_dict) - visualized_props.extend(args.label_layout) + for prop in args.label_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, 'counter')) if layout == 'colorbranch-layout': categorical_props = [prop for prop in args.colorbranch_layout if prop2type.get(prop) in [str, list, bool, None]] @@ -507,8 +515,9 @@ def run(args): colorbranch_layouts, level, color_dict = get_colorbranch_layouts(tree, categorical_props, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) layouts.extend(colorbranch_layouts) total_color_dict.append(color_dict) - visualized_props.extend(categorical_props) - #visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.piechart_layout]) + for prop in categorical_props: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, 'counter')) numerical_props = [prop for prop in args.colorbranch_layout if prop2type.get(prop) in [float, int]] if numerical_props: @@ -516,7 +525,9 @@ def run(args): prop2type, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config) layouts.extend(branchscore_layouts) - visualized_props.extend(numerical_props) + for prop in numerical_props: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) if layout == 'bubble-layout': categorical_props = [prop for prop in args.bubble_layout if prop2type.get(prop) in [str, list, bool, None]] @@ -527,7 +538,10 @@ def run(args): color_config=color_config) layouts.extend(bubble_layouts) total_color_dict.append(color_dict) - visualized_props.extend(categorical_props) + for prop in categorical_props: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, 'counter')) + #visualized_props.extend(categorical_props) #visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.piechart_layout numerical_props = [prop for prop in args.bubble_layout if prop2type.get(prop) in [float, int]] @@ -538,7 +552,10 @@ def run(args): internal_rep=internal_num_rep, bubble_range=args.bubble_range, color_config=color_config) layouts.extend(bubble_layouts) - visualized_props.extend(numerical_props) + #visualized_props.extend(numerical_props) + for prop in numerical_props: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) if layout == "piechart-layout": piechart_layouts = get_piechart_layouts(tree, args.piechart_layout, @@ -574,18 +591,21 @@ def run(args): layouts.extend(binary_layouts) total_color_dict.append(color_dict) visualized_props.extend(args.binary_layout) + visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.binary_layout]) if layout == 'binary-aggregate-layout': binary_aggregate_layouts, level, color_dict = get_binary_layouts(tree, args.binary_aggregate_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=False, aggregate=True) layouts.extend(binary_aggregate_layouts) total_color_dict.append(color_dict) visualized_props.extend(args.binary_aggregate_layout) + visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.binary_aggregate_layout]) if layout == 'binary-unicolor-layout': binary2_layouts, level, color_dict = get_binary_layouts(tree, args.binary_unicolor_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=True, aggregate=False) layouts.extend(binary2_layouts) total_color_dict.append(color_dict) visualized_props.extend(args.binary_unicolor_layout) + visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.binary_aggregate_layout]) if layout == 'binary-unicolor-aggregate-layout': binary2_aggregate_layouts, level, color_dict = get_binary_layouts(tree, args.binary_unicolor_aggregate_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=True, aggregate=True) @@ -600,12 +620,17 @@ def run(args): barplot_colorby=args.barplot_colorby, max_range=args.barplot_range) layouts.extend(barplot_layouts) total_color_dict.append(color_dict) - visualized_props.extend(args.barplot_layout) + for prop in args.barplot_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) + if layout == "branchscore-layout": branchscore_layouts = get_branchscore_layouts(tree, args.branchscore_layout, prop2type, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep='avg') layouts.extend(branchscore_layouts) - visualized_props.extend(args.branchscore_layout) + for prop in args.branchscore_layout: + visualized_props.append(prop) + visualized_props.append(utils.add_suffix(prop, internal_num_rep)) if layout == 'alignment-layout': lengh = len(max(utils.tree_prop_array(tree, 'alignment'),key=len))