From cead8a5817d48e15f9c1e482f1a2963f4be2bbb7 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Sun, 3 Nov 2024 22:27:04 +0300 Subject: [PATCH 01/79] cmd: dfit, dfit multi, hello world framework --- bundle_info.xml | 6 +++ src/__init__.py | 30 +++++++++++++ src/cmd.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 src/cmd.py diff --git a/bundle_info.xml b/bundle_info.xml index d80160b..7c22169 100644 --- a/bundle_info.xml +++ b/bundle_info.xml @@ -48,6 +48,12 @@ will be displayed without the ChimeraX- prefix. ChimeraX :: Tool :: DiffFit :: Volume Data :: Differentiable Fit in Map + + ChimeraX :: Command :: dfit :: General :: + Fit a single structure into a volume map + ChimeraX :: Command :: dfit disk :: General :: + Fit structures from disk into a volume map + \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py index 91bf11c..5de72c1 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -38,5 +38,35 @@ def get_class(class_name): return tool.DiffFitTool raise ValueError("Unknown class name '%s'" % class_name) + @staticmethod + def register_command(bi, ci, logger): + # bi is an instance of chimerax.core.toolshed.BundleInfo + # ci is an instance of chimerax.core.toolshed.CommandInfo + # logger is an instance of chimerax.core.logger.Logger + + # We check the name of the command, which should match + # one of the ones listed in bundle_info.xml + # (without the leading and trailing whitespace), + # and import the function to call and its argument + # description from the ``cmd`` module. + # If the description does not contain a synopsis, we + # add the one in ``ci``, which comes from bundle_info.xml. + from . import cmd + if ci.name == "dfit": + func = cmd.dfit + desc = cmd.dfit_desc + elif ci.name == "dfit disk": + func = cmd.dfit_disk + desc = cmd.dfit_disk_desc + else: + raise ValueError("trying to register unknown command: %s" % ci.name) + if desc.synopsis is None: + desc.synopsis = ci.synopsis + + # We then register the function as the command callback + # with the chimerax.core.commands module. + from chimerax.core.commands import register + register(ci.name, desc, func) + # Create the ``bundle_api`` object that ChimeraX expects. bundle_api = _MyAPI() \ No newline at end of file diff --git a/src/cmd.py b/src/cmd.py new file mode 100644 index 0000000..4d55a60 --- /dev/null +++ b/src/cmd.py @@ -0,0 +1,112 @@ +# vim: set expandtab shiftwidth=4 softtabstop=4: + +from chimerax.core.commands import CmdDesc # Command description +from chimerax.atomic import AtomsArg # Collection of atoms argument +from chimerax.core.commands import BoolArg # Boolean argument +from chimerax.core.commands import ColorArg # Color argument +from chimerax.core.commands import IntArg # Integer argument +from chimerax.core.commands import EmptyArg # (see below) +from chimerax.core.commands import Or, Bounded # Argument modifiers + + +# ========================================================================== +# Functions and descriptions for registering using ChimeraX bundle API +# ========================================================================== + + +def dfit(session, atoms, weighted=False, transformed=True): + """Report center of mass of given atoms.""" + + # ``session`` - ``chimerax.core.session.Session`` instance + # ``atoms`` - ``chimerax.atomic.Atoms`` instance or None + # ``weighted`` - boolean, whether to include atomic mass in calculation + # ``transformed`` - boolean, use scene rather than original coordinates + + atoms, coords, cofm = _get_cofm(session, atoms, transformed, weighted) + session.logger.info("%s center of mass: %s" % + ("weighted" if weighted else "unweighted", cofm)) + + +dfit_desc = CmdDesc(required=[("atoms", Or(AtomsArg, EmptyArg))], + optional=[("weighted", BoolArg), + ("transformed", BoolArg)]) + + + +def dfit_disk(session, atoms, color, weighted=False, transformed=True, count=1): + """Highlight the atoms nearest the center of mass of given atoms.""" + + # ``session`` - ``chimerax.core.session.Session`` instance + # ``atoms`` - ``chimerax.atomic.Atoms`` instance or None + # ``color`` - ``chimerax.core.colors.Color` instance + # ``weighted`` - boolean, whether to include atomic mass in calculation + # ``transformed`` - boolean, use scene rather than original coordinates + + # Compute the center of mass first + atoms, coords, cofm = _get_cofm(session, atoms, transformed, weighted) + + # Compute the distance of each atom from the cofm + # using the NumPy vector norm function + from numpy.linalg import norm + distances = norm(coords - cofm, axis=1) + + # Sort the array and get the "count" indices to the closest atoms + if count > len(atoms): + count = len(atoms) + from numpy import argsort + atom_indices = argsort(distances)[:count] + + # Create a collection of atoms from the indices + chosen = atoms[atom_indices] + + # Update their "colors". Assigning a single value to an + # array means assign the same value for all elements. + chosen.colors = color.uint8x4() + + +dfit_disk_desc = CmdDesc(required=[("atoms", Or(AtomsArg, EmptyArg)), + ("color", ColorArg)], + optional=[("weighted", BoolArg), + ("transformed", BoolArg), + ("count", Bounded(IntArg, 1, 50))]) + + +# ========================================================================== +# Functions intended only for internal use by bundle +# ========================================================================== + + +def _get_cofm(session, atoms, transformed, weighted): + # ``session`` - ``chimerax.core.session.Session`` instance + # ``atoms`` - ``chimerax.atomic.Atoms`` instance + # ``transformed`` - boolean, use scene rather than original coordinates + # ``weighted`` - boolean, whether to include atomic mass in calculation + + # If user did not specify the list of atoms, use all atoms + if atoms is None: + from chimerax.core.commands import all_objects + atoms = all_objects(session).atoms + + # We can use either transformed or untransformed coordinates. + # Transformed coordinates are "scene coordinates", which + # takes into account translation and rotation of individual + # models. Untransformed coordinates are the coordinates + # read from the data files. + if transformed: + coords = atoms.scene_coords + else: + coords = atoms.coords + + # ``coords`` is a ``numpy`` float array of shape (N, 3) + # If we want weighted center, we have to multiply coordinates + # by the atomic mass + if not weighted: + cofm = coords.mean(axis=0) + else: + m = atoms.elements.masses + c = coords * m[:,None] + cofm = c.sum(axis=0) / m.sum() + + # To get the average coordinates, we use ``numpy.mean`` + # print("DEBUG: center of mass:", cofm) + return atoms, coords, cofm \ No newline at end of file From 396e54935c2276171c70886386a889e3f774fe51 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sun, 3 Nov 2024 23:56:38 +0300 Subject: [PATCH 02/79] Example commands --- src/cmd.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cmd.py b/src/cmd.py index 4d55a60..401532a 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -31,6 +31,16 @@ def dfit(session, atoms, weighted=False, transformed=True): optional=[("weighted", BoolArg), ("transformed", BoolArg)]) +# Example commands +# dfit #1 in #2 +# dfit #1 in #2 level 0.7 sim_res 5.0 num_p 10 num_r 100 smooth_by smooth_loops kernel_sizes gaussian_mode fit_atom_mode out_dir device +# +# dfit multi str_dir sim_dir in map level 0.7 num_p 10 num_r 100 +# smooth_by smooth_loops smooth_weights kernel_sizes gaussian_mode fit_atom_mode out_dir device +# negative_space +# learning_rate +# n_iters + def dfit_disk(session, atoms, color, weighted=False, transformed=True, count=1): From c390cc13d161b26ce282c5d0965874c242941363 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Mon, 4 Nov 2024 11:34:45 +0300 Subject: [PATCH 03/79] log fit_atom_mode --- src/tool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tool.py b/src/tool.py index 05d982f..5b0189d 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1476,6 +1476,7 @@ def single_fit_button_clicked(self): f"Smooth loops: {self._single_fit_gaussian_loops.value()}\n" f"Kernel sizes: {self.smooth_kernel_sizes.text()}\n" f"Gaussian mode: {self.Gaussian_mode}\n" + f"Fit atom mode: {self.fit_atom_mode}\n" f"-------\n") self.disable_spheres_clicked() @@ -1680,6 +1681,7 @@ def run_button_clicked(self): f"# positions: {self.settings.N_shifts}\n" f"# rotations: {self.settings.N_quaternions}\n" f"Gaussian mode: {self.Gaussian_mode}\n" + f"Fit atom mode: {self.fit_atom_mode}\n" f"Conv. loops: {self.settings.conv_loops}\n" f"Conv. kernel sizes: {self.settings.conv_kernel_sizes}\n" f"Conv. weights: {self.settings.conv_weights}\n" From 675ed885bde7ac7bc9c9d4b154bd8b0391155d13 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Mon, 4 Nov 2024 13:23:50 +0300 Subject: [PATCH 04/79] dfit cmd desc and func def header --- src/cmd.py | 102 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/src/cmd.py b/src/cmd.py index 401532a..3e839b3 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -1,45 +1,95 @@ # vim: set expandtab shiftwidth=4 softtabstop=4: from chimerax.core.commands import CmdDesc # Command description -from chimerax.atomic import AtomsArg # Collection of atoms argument -from chimerax.core.commands import BoolArg # Boolean argument -from chimerax.core.commands import ColorArg # Color argument -from chimerax.core.commands import IntArg # Integer argument +from chimerax.atomic import StructureArg, AtomsArg # Collection of atoms argument +from chimerax.core.commands import BoolArg, ColorArg, IntArg, FloatArg, StringArg, SaveFolderNameArg from chimerax.core.commands import EmptyArg # (see below) from chimerax.core.commands import Or, Bounded # Argument modifiers +from chimerax.map.mapargs import MapArg +import os +from datetime import datetime # ========================================================================== # Functions and descriptions for registering using ChimeraX bundle API # ========================================================================== -def dfit(session, atoms, weighted=False, transformed=True): - """Report center of mass of given atoms.""" - - # ``session`` - ``chimerax.core.session.Session`` instance - # ``atoms`` - ``chimerax.atomic.Atoms`` instance or None - # ``weighted`` - boolean, whether to include atomic mass in calculation - # ``transformed`` - boolean, use scene rather than original coordinates - - atoms, coords, cofm = _get_cofm(session, atoms, transformed, weighted) - session.logger.info("%s center of mass: %s" % - ("weighted" if weighted else "unweighted", cofm)) - - -dfit_desc = CmdDesc(required=[("atoms", Or(AtomsArg, EmptyArg))], - optional=[("weighted", BoolArg), - ("transformed", BoolArg)]) +def dfit(session, mol, in_map, + level=None, + sim_res=5.0, + num_positions=10, + num_rotations=100, + smooth_by="PyTorch iterative Gaussian", + smooth_loops=3, + kernel_sizes="[5, 5, 5]", + Gaussian_mode="Gaussian with negative (shrink)", + fit_atom_mode="Backbone", + out_dir="DiffFit_out/interactive", + device=None): + """Fit a single structure into a volume map""" + + _save_results = True + _out_dir_exist_ok = True + _out_dir = out_dir + + use_level = in_map.maximum_surface_level + if level is not None: + use_level = level + + if _save_results: + + os.makedirs(_out_dir, exist_ok=_out_dir_exist_ok) + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"=======\n" + f"Wall clock time: {datetime.now()}\n" + f"-------\n" + f"Interactive mode\n" + f"Target Volume: {in_map.path}\n" + f"Structure: {mol.filename}\n" + f"Target Surface Threshold: {use_level}\n" + f"-------\n" + f"Sim-map resolution: {sim_res}\n" + f"# positions: {num_positions}\n" + f"# rotations: {num_rotations}\n" + f"Smooth by: \"{smooth_by}\"\n" + f"Smooth loops: {smooth_loops}\n" + f"Kernel sizes: \"{kernel_sizes}\"\n" + f"Gaussian mode: \"{Gaussian_mode}\"\n" + f"Fit atom mode: \"{fit_atom_mode}\"\n" + f"-------\n") + + +dfit_desc = CmdDesc(required=[("mol", StructureArg)], + keyword=[("in_map", MapArg), + ("level", FloatArg), + ("sim_res", FloatArg), + ("num_positions", IntArg), + ("num_rotations", IntArg), + ("smooth_by", StringArg), + ("smooth_loops", StringArg), + ("kernel_sizes", StringArg), + ("Gaussian_mode", StringArg), + ("fit_atom_mode", StringArg), + ("out_dir", SaveFolderNameArg), + ("device", StringArg)], + required_arguments=['in_map']) # Example commands # dfit #1 in #2 -# dfit #1 in #2 level 0.7 sim_res 5.0 num_p 10 num_r 100 smooth_by smooth_loops kernel_sizes gaussian_mode fit_atom_mode out_dir device +# dfit #1 in #2 +# level 0.7 sim_res 5.0 +# num_p 10 num_r 100 +# smooth_by smooth_loops kernel_sizes gaussian_mode +# fit_atom_mode out_dir device # -# dfit multi str_dir sim_dir in map level 0.7 num_p 10 num_r 100 -# smooth_by smooth_loops smooth_weights kernel_sizes gaussian_mode fit_atom_mode out_dir device -# negative_space -# learning_rate -# n_iters +# dfit multi str_dir sim_dir in map +# level 0.7 num_p 10 num_r 100 +# smooth_by smooth_loops smooth_weights kernel_sizes gaussian_mode +# fit_atom_mode out_dir device +# negative_space +# learning_rate +# n_iters From 17056e12a866741f68c365e54e55901a609c6df3 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Mon, 4 Nov 2024 13:27:18 +0300 Subject: [PATCH 05/79] Add quotes to log for strings --- src/tool.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tool.py b/src/tool.py index 5b0189d..df4fc93 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1472,11 +1472,11 @@ def single_fit_button_clicked(self): f"Sim-map resolution: {self._single_fit_res.value()}\n" f"# positions: {self._single_fit_n_shifts.value()}\n" f"# rotations: {self._single_fit_n_quaternions.value()}\n" - f"Smooth by: {self._smooth_by.currentText()}\n" + f"Smooth by: \"{self._smooth_by.currentText()}\"\n" f"Smooth loops: {self._single_fit_gaussian_loops.value()}\n" - f"Kernel sizes: {self.smooth_kernel_sizes.text()}\n" - f"Gaussian mode: {self.Gaussian_mode}\n" - f"Fit atom mode: {self.fit_atom_mode}\n" + f"Kernel sizes: \"{self.smooth_kernel_sizes.text()}\"\n" + f"Gaussian mode: \"{self.Gaussian_mode}\"\n" + f"Fit atom mode: \"{self.fit_atom_mode}\"\n" f"-------\n") self.disable_spheres_clicked() @@ -1680,11 +1680,11 @@ def run_button_clicked(self): f"-------\n" f"# positions: {self.settings.N_shifts}\n" f"# rotations: {self.settings.N_quaternions}\n" - f"Gaussian mode: {self.Gaussian_mode}\n" - f"Fit atom mode: {self.fit_atom_mode}\n" + f"Gaussian mode: \"{self.Gaussian_mode}\"\n" + f"Fit atom mode: \"{self.fit_atom_mode}\"\n" f"Conv. loops: {self.settings.conv_loops}\n" - f"Conv. kernel sizes: {self.settings.conv_kernel_sizes}\n" - f"Conv. weights: {self.settings.conv_weights}\n" + f"Conv. kernel sizes: \"{self.settings.conv_kernel_sizes}\"\n" + f"Conv. weights: \"{self.settings.conv_weights}\"\n" f"-------\n") From 0c2bc9470eb8dc62a0d4d69ab7226df510a49b8b Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Mon, 4 Nov 2024 14:30:46 +0300 Subject: [PATCH 06/79] dfit cmd working now --- src/cmd.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/tool.py | 2 +- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/src/cmd.py b/src/cmd.py index 3e839b3..06a9358 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -8,8 +8,15 @@ from chimerax.map.mapargs import MapArg import os +import numpy as np +import torch from datetime import datetime +from .DiffAtomComp import diff_atom_comp, cluster_and_sort_sqd_fast, diff_fit, conv_volume, numpy2tensor, \ + linear_norm_tensor +import ast +from chimerax.core.commands import run + # ========================================================================== # Functions and descriptions for registering using ChimeraX bundle API # ========================================================================== @@ -33,9 +40,15 @@ def dfit(session, mol, in_map, _out_dir_exist_ok = True _out_dir = out_dir - use_level = in_map.maximum_surface_level + _use_level = in_map.maximum_surface_level if level is not None: - use_level = level + _use_level = level + + _use_device = "cpu" + if torch.cuda.is_available(): + _use_device = "cuda:0" + if device is not None: + _use_device = device if _save_results: @@ -47,7 +60,7 @@ def dfit(session, mol, in_map, f"Interactive mode\n" f"Target Volume: {in_map.path}\n" f"Structure: {mol.filename}\n" - f"Target Surface Threshold: {use_level}\n" + f"Target Surface Threshold: {_use_level}\n" f"-------\n" f"Sim-map resolution: {sim_res}\n" f"# positions: {num_positions}\n" @@ -57,8 +70,87 @@ def dfit(session, mol, in_map, f"Kernel sizes: \"{kernel_sizes}\"\n" f"Gaussian mode: \"{Gaussian_mode}\"\n" f"Fit atom mode: \"{fit_atom_mode}\"\n" + f"Device: \"{_use_device}\"\n" f"-------\n") + single_fit_timer_start = datetime.now() + + # Prepare mol and vol + vol_matrix = in_map.full_matrix() + + # Copy vol and make it clean after thresholding + vol_copy = in_map.writable_copy() + vol_copy_matrix = vol_copy.data.matrix() + vol_copy_matrix[vol_copy_matrix < _use_level] = 0 + vol_copy.data.values_changed() + + # Smooth the volume + volume_conv_list = _create_volume_conv_list(session, + vol_copy, + smooth_by, smooth_loops, kernel_sizes, Gaussian_mode, + _use_device) + vol_copy.delete() + + # Apply the user's transformation and center mol + from chimerax.geometry import Place + mol.atoms.transform(mol.position) + mol_center = mol.atoms.coords.mean(axis=0) + transform = Place(origin=-mol_center) + mol.atoms.transform(transform) + mol.position = Place() + + # Simulate a map for the mol + from chimerax.map.molmap import molecule_map + mol_vol = molecule_map(session, mol.atoms, sim_res, + grid_spacing=in_map.data.step[0]) + + input_coords = None + if fit_atom_mode == "Backbone": + backbone_atoms = ['N', 'CA', 'C', 'O'] + is_backbone = np.isin(mol.atoms.names, backbone_atoms) + + input_coords = mol.atoms.scene_coords[is_backbone] + elif fit_atom_mode == "All": + input_coords = mol.atoms.scene_coords + + # Fit + timer_start = datetime.now() + + if _save_results: + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"DiffFit optimization starts: {timer_start}\n") + + (_, + _, + mol_paths, + mol_centers, + fit_result) = diff_fit( + volume_conv_list, + in_map.path, + _use_level, + in_map.data.step, + in_map.data.origin, + 10, + [input_coords], + mol.filename, + [(mol_vol.full_matrix(), mol_vol.data.step, mol_vol.data.origin)], + N_shifts=num_rotations, + N_quaternions=num_rotations, + save_results=_save_results, + out_dir=_out_dir, + out_dir_exist_ok=_out_dir_exist_ok, + device=_use_device + ) + timer_stop = datetime.now() + print(f"\nDiffFit optimization time elapsed: {timer_stop - timer_start}\n\n") + + if _save_results: + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit optimization time elapsed: {timer_stop - timer_start}\n") + + mol_vol.delete() + dfit_desc = CmdDesc(required=[("mol", StructureArg)], keyword=[("in_map", MapArg), @@ -169,4 +261,63 @@ def _get_cofm(session, atoms, transformed, weighted): # To get the average coordinates, we use ``numpy.mean`` # print("DEBUG: center of mass:", cofm) - return atoms, coords, cofm \ No newline at end of file + return atoms, coords, cofm + + +def _create_volume_conv_list(session, vol, smooth_by, smooth_loops, smooth_kernel_sizes, Gaussian_mode, device, negative_space_value=-0.5): + # From here on, there are three strategies for utilizing gaussian smooth + # 1. with increasing sDev on the same input volume + # 2. with the same sDev iteratively + # Combine 1 & 2 + # Need to do experiment to see which one is better + + volume_conv_list = [None] * (smooth_loops + 1) + volume_conv_list[0] = vol.full_matrix() + + if smooth_by == "PyTorch iterative Gaussian": + volume_conv_list[0], _ = numpy2tensor(volume_conv_list[0], device) + volume_conv_list[0] = linear_norm_tensor(volume_conv_list[0]) + volume_conv_list = conv_volume(volume_conv_list[0], + device, + smooth_loops, + ast.literal_eval(smooth_kernel_sizes), + negative_space_value=negative_space_value, + kernel_type="Gaussian", + mode=Gaussian_mode) + volume_conv_list = [v.squeeze().detach().cpu().numpy() for v in volume_conv_list] + elif smooth_by == "ChimeraX incremental Gaussian": + for conv_idx in range(1, smooth_loops + 1): + vol_gaussian = run(session, f"volume gaussian #{vol.id[0]} sDev {conv_idx}") + + vol_device, _ = numpy2tensor(vol_gaussian.full_matrix(), device) + vol_device = linear_norm_tensor(vol_device) + + eligible_volume_tensor = vol_device > 0.0 + vol_device[~eligible_volume_tensor] = negative_space_value + + volume_conv_list[conv_idx] = vol_device.squeeze().detach().cpu().numpy() + + vol_gaussian.delete() + elif smooth_by == "ChimeraX iterative Gaussian": + kernel_sizes = ast.literal_eval(smooth_kernel_sizes) + vol_current = vol + for conv_idx in range(1, smooth_loops + 1): + vol_gaussian = run(session, f"volume gaussian #{vol_current.id[0]} sDev {kernel_sizes[conv_idx - 1]}") + + if conv_idx > 1: + vol_current.delete() + + vol_device, _ = numpy2tensor(vol_gaussian.full_matrix(), device) + vol_device = linear_norm_tensor(vol_device) + + eligible_volume_tensor = vol_device > 0.0 + vol_device[~eligible_volume_tensor] = negative_space_value + + volume_conv_list[conv_idx] = vol_device.squeeze().detach().cpu().numpy() + + vol_current = vol_gaussian + + vol_current.delete() + + + return volume_conv_list \ No newline at end of file diff --git a/src/tool.py b/src/tool.py index df4fc93..cd89ad8 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1483,7 +1483,7 @@ def single_fit_button_clicked(self): single_fit_timer_start = datetime.now() - # Prepare mol anv vol + # Prepare mol and vol mol = self._object_menu.value self.fit_mol_list = [mol] From 593ae9dd381446bd6944be50a6c013cd071941db Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 4 Nov 2024 17:41:57 +0300 Subject: [PATCH 07/79] clear dfit cmd bug: num_positions --- src/cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd.py b/src/cmd.py index 06a9358..cf6e4f1 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -134,7 +134,7 @@ def dfit(session, mol, in_map, [input_coords], mol.filename, [(mol_vol.full_matrix(), mol_vol.data.step, mol_vol.data.origin)], - N_shifts=num_rotations, + N_shifts=num_positions, N_quaternions=num_rotations, save_results=_save_results, out_dir=_out_dir, From 69ca27d67b20fcc9670d2277aaec7c334d1e837f Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 4 Nov 2024 22:35:06 +0300 Subject: [PATCH 08/79] dfit cmd done --- src/cmd.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/cmd.py b/src/cmd.py index cf6e4f1..20c3423 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -73,6 +73,15 @@ def dfit(session, mol, in_map, f"Device: \"{_use_device}\"\n" f"-------\n") + from chimerax.core import tools + from .tool import DiffFitTool + df = tools.get_singleton(session, DiffFitTool, 'DiffFit', create=True) + + df.disable_spheres_clicked() + df.fit_mol_list = [mol] + df.fit_vol = in_map + df.mol = mol + single_fit_timer_start = datetime.now() # Prepare mol and vol @@ -122,9 +131,9 @@ def dfit(session, mol, in_map, (_, _, - mol_paths, - mol_centers, - fit_result) = diff_fit( + df.mol_paths, + df.mol_centers, + df.fit_result) = diff_fit( volume_conv_list, in_map.path, _use_level, @@ -151,6 +160,27 @@ def dfit(session, mol, in_map, mol_vol.delete() + df._view_input_mode.setCurrentText("interactive") + df._view_input_mode_changed() + df.interactive_fit_result_ready = True + df.show_results(df.fit_result, df.mol_centers, df.mol_paths) + + df.tab_widget.setCurrentWidget(df.tab_view_group) + + df.select_table_item(0) + + timer_stop = datetime.now() + print(f"\nDiffFit total time elapsed: {timer_stop - single_fit_timer_start}\n\n") + + if _save_results: + metric_json = df.return_cluster_metric_json(0) + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"DiffFit total time elapsed: {timer_stop - single_fit_timer_start}\n" + f"-------\n" + f"DiffFit top fit metric:\n" + f"{metric_json}\n" + f"=======\n\n") + dfit_desc = CmdDesc(required=[("mol", StructureArg)], keyword=[("in_map", MapArg), From 9f55a63580ac969fb3e491036c54257380f0fd67 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Tue, 5 Nov 2024 12:06:08 +0300 Subject: [PATCH 09/79] min_cluster_size not in use comment --- src/DiffAtomComp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index cb37fb0..596c376 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -889,7 +889,7 @@ def diff_fit(volume_list: list, def diff_atom_comp(target_vol_path: str, target_surface_threshold: float, - min_cluster_size: float, + min_cluster_size: float, # not in use, use 100 as a placeholder structures_dir: str, structures_sim_map_dir: str, fit_atom_mode:str = "Backbone", From 52d1bb9d3db8c310ce56a5e87de10fcafb0dd80a Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Tue, 5 Nov 2024 12:06:26 +0300 Subject: [PATCH 10/79] dfit disk cmd header --- bundle_info.xml | 2 +- src/cmd.py | 123 +++++++++++++++++++----------------------------- 2 files changed, 50 insertions(+), 75 deletions(-) diff --git a/bundle_info.xml b/bundle_info.xml index 7c22169..ae0208d 100644 --- a/bundle_info.xml +++ b/bundle_info.xml @@ -52,7 +52,7 @@ will be displayed without the ChimeraX- prefix. ChimeraX :: Command :: dfit :: General :: Fit a single structure into a volume map ChimeraX :: Command :: dfit disk :: General :: - Fit structures from disk into a volume map + Fit a list of structures from disk into a volume map diff --git a/src/cmd.py b/src/cmd.py index 20c3423..550afce 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -2,7 +2,9 @@ from chimerax.core.commands import CmdDesc # Command description from chimerax.atomic import StructureArg, AtomsArg # Collection of atoms argument -from chimerax.core.commands import BoolArg, ColorArg, IntArg, FloatArg, StringArg, SaveFolderNameArg +from chimerax.core.commands import (BoolArg, ColorArg, IntArg, FloatArg, StringArg, + SaveFolderNameArg, OpenFileNameArg, SaveFileNameArg, + OpenFolderNameArg, SaveFolderNameArg) from chimerax.core.commands import EmptyArg # (see below) from chimerax.core.commands import Or, Bounded # Argument modifiers from chimerax.map.mapargs import MapArg @@ -195,7 +197,7 @@ def dfit(session, mol, in_map, ("fit_atom_mode", StringArg), ("out_dir", SaveFolderNameArg), ("device", StringArg)], - required_arguments=['in_map']) + required_arguments=["in_map"]) # Example commands # dfit #1 in #2 @@ -204,53 +206,62 @@ def dfit(session, mol, in_map, # num_p 10 num_r 100 # smooth_by smooth_loops kernel_sizes gaussian_mode # fit_atom_mode out_dir device -# -# dfit multi str_dir sim_dir in map -# level 0.7 num_p 10 num_r 100 -# smooth_by smooth_loops smooth_weights kernel_sizes gaussian_mode -# fit_atom_mode out_dir device -# negative_space -# learning_rate -# n_iters +def dfit_disk(session, str_dir, sim_dir, in_map, level, -def dfit_disk(session, atoms, color, weighted=False, transformed=True, count=1): - """Highlight the atoms nearest the center of mass of given atoms.""" + num_positions=10, + num_rotations=100, - # ``session`` - ``chimerax.core.session.Session`` instance - # ``atoms`` - ``chimerax.atomic.Atoms`` instance or None - # ``color`` - ``chimerax.core.colors.Color` instance - # ``weighted`` - boolean, whether to include atomic mass in calculation - # ``transformed`` - boolean, use scene rather than original coordinates + smooth_loops=3, + kernel_sizes="[5, 5, 5]", + smooth_weights="[1.0, 1.0, 1.0]", - # Compute the center of mass first - atoms, coords, cofm = _get_cofm(session, atoms, transformed, weighted) + Gaussian_mode="Gaussian with negative (shrink)", + fit_atom_mode="Backbone", + negative_space=-0.5, - # Compute the distance of each atom from the cofm - # using the NumPy vector norm function - from numpy.linalg import norm - distances = norm(coords - cofm, axis=1) + learning_rate=0.01, + n_iters=201, - # Sort the array and get the "count" indices to the closest atoms - if count > len(atoms): - count = len(atoms) - from numpy import argsort - atom_indices = argsort(distances)[:count] + out_dir="DiffFit_out/disk", + device=None): + """Fit a list of structures from disk into a volume map""" - # Create a collection of atoms from the indices - chosen = atoms[atom_indices] - # Update their "colors". Assigning a single value to an - # array means assign the same value for all elements. - chosen.colors = color.uint8x4() +dfit_disk_desc = CmdDesc(keyword=[("str_dir", OpenFolderNameArg), + ("sim_dir", OpenFolderNameArg), + ("in_map", OpenFileNameArg), + ("level", FloatArg), + ("num_positions", IntArg), + ("num_rotations", IntArg), -dfit_disk_desc = CmdDesc(required=[("atoms", Or(AtomsArg, EmptyArg)), - ("color", ColorArg)], - optional=[("weighted", BoolArg), - ("transformed", BoolArg), - ("count", Bounded(IntArg, 1, 50))]) + ("smooth_loops", StringArg), + ("kernel_sizes", StringArg), + ("smooth_weights", StringArg), + + ("Gaussian_mode", StringArg), + ("fit_atom_mode", StringArg), + ("negative_space", FloatArg), + + ("learning_rate", FloatArg), + ("n_iters", IntArg), + + ("out_dir", SaveFolderNameArg), + ("device", StringArg)], + required_arguments=["str_dir", + "sim_dir", + "in_map", + "level"]) + +# dfit disk str_dir dir sim_dir dir in map level 0.7 +# num_p 10 num_r 100 +# smooth_by smooth_loops smooth_weights kernel_sizes gaussian_mode +# fit_atom_mode out_dir device +# negative_space +# learning_rate +# n_iters # ========================================================================== @@ -258,42 +269,6 @@ def dfit_disk(session, atoms, color, weighted=False, transformed=True, count=1): # ========================================================================== -def _get_cofm(session, atoms, transformed, weighted): - # ``session`` - ``chimerax.core.session.Session`` instance - # ``atoms`` - ``chimerax.atomic.Atoms`` instance - # ``transformed`` - boolean, use scene rather than original coordinates - # ``weighted`` - boolean, whether to include atomic mass in calculation - - # If user did not specify the list of atoms, use all atoms - if atoms is None: - from chimerax.core.commands import all_objects - atoms = all_objects(session).atoms - - # We can use either transformed or untransformed coordinates. - # Transformed coordinates are "scene coordinates", which - # takes into account translation and rotation of individual - # models. Untransformed coordinates are the coordinates - # read from the data files. - if transformed: - coords = atoms.scene_coords - else: - coords = atoms.coords - - # ``coords`` is a ``numpy`` float array of shape (N, 3) - # If we want weighted center, we have to multiply coordinates - # by the atomic mass - if not weighted: - cofm = coords.mean(axis=0) - else: - m = atoms.elements.masses - c = coords * m[:,None] - cofm = c.sum(axis=0) / m.sum() - - # To get the average coordinates, we use ``numpy.mean`` - # print("DEBUG: center of mass:", cofm) - return atoms, coords, cofm - - def _create_volume_conv_list(session, vol, smooth_by, smooth_loops, smooth_kernel_sizes, Gaussian_mode, device, negative_space_value=-0.5): # From here on, there are three strategies for utilizing gaussian smooth # 1. with increasing sDev on the same input volume From 8425c064d792e6808144c268906b29b2b5229131 Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Tue, 5 Nov 2024 17:59:51 +0300 Subject: [PATCH 11/79] Update Reproduce.md --- Reproduce.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Reproduce.md b/Reproduce.md index 2de9157..92d5bff 100644 --- a/Reproduce.md +++ b/Reproduce.md @@ -13,7 +13,9 @@ This guide is to reproduce Fig. 4 of our paper. 1. Download [DiffFit-0.6.2-py3-none-any.whl](https://github.com/nanovis/DiffFit/releases/download/v0.6.2/DiffFit-0.6.2-py3-none-any.whl) 2. Follow the [installation guide](https://github.com/nanovis/DiffFit?tab=readme-ov-file#install) -3. Download the [repository](https://github.com/nanovis/DiffFit/archive/refs/heads/main.zip) and unzip it to Desktop +3. Download the [repository](https://github.com/nanovis/DiffFit/archive/refs/heads/main.zip) + and unzip it to your working directory + (which is your Desktop if you are on Windows or your home folder if you are on Linux). 4. Open ChimeraX, launch DiffFit, go to the `Settings` tab, change `Fit atoms:` to `All atoms`. 5. Go to the `Disk` tab, set the parameters as below: 1. Target Volume: `DiffFit-main\dev_data\input\domain_fit_demo_3domains\density2.mrc` From 90de92bb71400e6980abf040a66d39acaeb6a35e Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Tue, 5 Nov 2024 18:07:21 +0300 Subject: [PATCH 12/79] dfit disk cmd log --- src/cmd.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/cmd.py b/src/cmd.py index 550afce..7b29899 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -228,6 +228,58 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, device=None): """Fit a list of structures from disk into a volume map""" + from chimerax.core import tools + from .tool import DiffFitTool + df = tools.get_singleton(session, DiffFitTool, 'DiffFit', create=True) + + if df.interactive_fit_result_ready: + df.session.logger.error("You have run the fitting in Interactive mode. " + "Please run the following command: \n\n" + "close session\n\n" + "and then launch DiffFit again to run the fitting in Disk mode.") + return + + _save_results = True + _out_dir_exist_ok = True + _out_dir = out_dir + + _use_device = "cpu" + if torch.cuda.is_available(): + _use_device = "cuda:0" + if device is not None: + _use_device = device + + os.makedirs(_out_dir, exist_ok=_out_dir_exist_ok) + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"=======\n" + f"Wall clock time: {datetime.now()}\n" + f"-------\n" + f"Disk mode\n" + f"Structures Folder: {str_dir}\n" + f"Sim-map Folder: {sim_dir}\n" + f"Target Volume: {in_map}\n" + f"Target Surface Threshold: {level}\n" + f"-------\n" + + f"# positions: {num_positions}\n" + f"# rotations: {num_rotations}\n" + + f"Conv. loops: {smooth_loops}\n" + f"Conv. kernel sizes: \"{kernel_sizes}\"\n" + f"Conv. weights: \"{smooth_weights}\"\n" + + f"Gaussian mode: \"{Gaussian_mode}\"\n" + f"Fit atom mode: \"{fit_atom_mode}\"\n" + f"Negative space: \"{negative_space}\"\n" + + f"Learning rate: \"{learning_rate}\"\n" + f"# iters: \"{n_iters}\"\n" + + f"Out dir: \"{_out_dir}\"\n" + f"Device: \"{_use_device}\"\n" + f"-------\n") + + dfit_disk_desc = CmdDesc(keyword=[("str_dir", OpenFolderNameArg), ("sim_dir", OpenFolderNameArg), From 9f531a4f6d07b3cbd218ba38e42f825f41cc534f Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Tue, 5 Nov 2024 18:18:05 +0300 Subject: [PATCH 13/79] dfit disk cmd done --- src/cmd.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/cmd.py b/src/cmd.py index 7b29899..3bc77b7 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -279,6 +279,70 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, f"Device: \"{_use_device}\"\n" f"-------\n") + df.disable_spheres_clicked() + disk_fit_timer_start = datetime.now() + + print("Running the computation...") + + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"DiffFit optimization starts: {disk_fit_timer_start}\n") + + timer_start = datetime.now() + (target_vol_path, + target_surface_threshold, + mol_paths, + mol_centers, + e_sqd_log) = diff_atom_comp( + target_vol_path=in_map, + target_surface_threshold=level, + min_cluster_size=100, + structures_dir=str_dir, + structures_sim_map_dir=sim_dir, + fit_atom_mode=fit_atom_mode, + Gaussian_mode=Gaussian_mode, + N_shifts=num_positions, + N_quaternions=num_rotations, + negative_space_value=negative_space, + learning_rate=learning_rate, + n_iters=n_iters, + out_dir=_out_dir, + out_dir_exist_ok=_out_dir_exist_ok, + conv_loops=smooth_loops, + conv_kernel_sizes=ast.literal_eval(kernel_sizes), + conv_weights=ast.literal_eval(smooth_weights), + device=_use_device + ) + + timer_stop = datetime.now() + print(f"\nDiffFit optimization time elapsed: {timer_stop - timer_start}\n\n") + + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit optimization time elapsed: {timer_stop - timer_start}\n") + + # copy the directories + df.target_vol.setText(in_map) + df.dataset_folder.setText(_out_dir) + + # output is tensor, convert to numpy + df.show_results(e_sqd_log.detach().cpu().numpy(), + mol_centers, + mol_paths, + target_vol_path, + target_surface_threshold) + df.tab_widget.setCurrentWidget(df.tab_view_group) + df.select_table_item(0) + + timer_stop = datetime.now() + print(f"\nDiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n\n") + + metric_json = df.return_cluster_metric_json(0) + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"DiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n" + f"-------\n" + f"DiffFit top fit metric:\n" + f"{metric_json}\n" + f"=======\n\n") dfit_disk_desc = CmdDesc(keyword=[("str_dir", OpenFolderNameArg), From 3a10569c66598678b12197f62b12341b49054edf Mon Sep 17 00:00:00 2001 From: Deng Luo Date: Wed, 6 Nov 2024 13:44:36 +0300 Subject: [PATCH 14/79] dfit path cmd done --- bundle_info.xml | 2 ++ src/__init__.py | 3 +++ src/cmd.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/bundle_info.xml b/bundle_info.xml index ae0208d..1d2db1d 100644 --- a/bundle_info.xml +++ b/bundle_info.xml @@ -53,6 +53,8 @@ will be displayed without the ChimeraX- prefix. Fit a single structure into a volume map ChimeraX :: Command :: dfit disk :: General :: Fit a list of structures from disk into a volume map + ChimeraX :: Command :: dfit path :: General :: + Manipulate the paths in DiffFit results diff --git a/src/__init__.py b/src/__init__.py index 5de72c1..fe9c8ef 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -58,6 +58,9 @@ def register_command(bi, ci, logger): elif ci.name == "dfit disk": func = cmd.dfit_disk desc = cmd.dfit_disk_desc + elif ci.name == "dfit path": + func = cmd.dfit_path + desc = cmd.dfit_path_desc else: raise ValueError("trying to register unknown command: %s" % ci.name) if desc.synopsis is None: diff --git a/src/cmd.py b/src/cmd.py index 3bc77b7..f779626 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -380,6 +380,50 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, # n_iters +def dfit_path(session, mode, res_dir, old_path=None, new_path=None): + """Change the volume map's path in DiffFit results""" + import numpy as np + fit_res = np.load(f"{res_dir}/fit_res.npz") + target_vol_path = fit_res['target_vol_path'] + target_surface_threshold = fit_res['target_surface_threshold'] + mol_paths = fit_res['mol_paths'] + mol_centers = fit_res['mol_centers'] + opt_res = fit_res['opt_res'] + + if mode == "show": + print(f"Vol path: {target_vol_path}") + print(f"Mol paths: {mol_paths}") + + elif mode == "vol": + if new_path is None: + session.logger.error("You must specify a new path.") + return + + target_vol_path = new_path + print(f"New Vol path: {new_path}") + + elif mode == "mol": + if old_path is None or new_path is None: + session.logger.error("You must specify both old and new paths.") + return + + # Replace the old path with the new path in mol_paths array + mol_paths = np.array([path.replace(old_path, new_path) if old_path in path else path for path in mol_paths]) + print(f"Replaced \"{old_path}\" with \"{new_path}\" in the molecules' path.") + + np.savez(f"{res_dir}/fit_res.npz", + target_vol_path=target_vol_path, + target_surface_threshold=target_surface_threshold, + mol_paths=mol_paths, + mol_centers=mol_centers, + opt_res=opt_res) + + +dfit_path_desc = CmdDesc(required=[("mode", StringArg), + ("res_dir", OpenFolderNameArg)], + keyword=[("old_path", StringArg), + ("new_path", StringArg)]) + # ========================================================================== # Functions intended only for internal use by bundle # ========================================================================== From 4777ee534c983fda9d99684b5603c4296c052704 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 11:15:52 +0300 Subject: [PATCH 15/79] Safe guard dfit disk cmd for nogui --- src/cmd.py | 55 +++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/cmd.py b/src/cmd.py index f779626..35d03d2 100644 --- a/src/cmd.py +++ b/src/cmd.py @@ -232,12 +232,13 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, from .tool import DiffFitTool df = tools.get_singleton(session, DiffFitTool, 'DiffFit', create=True) - if df.interactive_fit_result_ready: - df.session.logger.error("You have run the fitting in Interactive mode. " - "Please run the following command: \n\n" - "close session\n\n" - "and then launch DiffFit again to run the fitting in Disk mode.") - return + if df is not None: + if df.interactive_fit_result_ready: + df.session.logger.error("You have run the fitting in Interactive mode. " + "Please run the following command: \n\n" + "close session\n\n" + "and then launch DiffFit again to run the fitting in Disk mode.") + return _save_results = True _out_dir_exist_ok = True @@ -279,7 +280,9 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, f"Device: \"{_use_device}\"\n" f"-------\n") - df.disable_spheres_clicked() + if df is not None: + df.disable_spheres_clicked() + disk_fit_timer_start = datetime.now() print("Running the computation...") @@ -320,29 +323,31 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, log_file.write(f"-------\n" f"DiffFit optimization time elapsed: {timer_stop - timer_start}\n") - # copy the directories - df.target_vol.setText(in_map) - df.dataset_folder.setText(_out_dir) + if df is not None: + # copy the directories + df.target_vol.setText(in_map) + df.dataset_folder.setText(_out_dir) - # output is tensor, convert to numpy - df.show_results(e_sqd_log.detach().cpu().numpy(), - mol_centers, - mol_paths, - target_vol_path, - target_surface_threshold) - df.tab_widget.setCurrentWidget(df.tab_view_group) - df.select_table_item(0) + # output is tensor, convert to numpy + df.show_results(e_sqd_log.detach().cpu().numpy(), + mol_centers, + mol_paths, + target_vol_path, + target_surface_threshold) + df.tab_widget.setCurrentWidget(df.tab_view_group) + df.select_table_item(0) timer_stop = datetime.now() print(f"\nDiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n\n") - metric_json = df.return_cluster_metric_json(0) - with open(f"{_out_dir}/log.log", "a") as log_file: - log_file.write(f"DiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n" - f"-------\n" - f"DiffFit top fit metric:\n" - f"{metric_json}\n" - f"=======\n\n") + if df is not None: + metric_json = df.return_cluster_metric_json(0) + with open(f"{_out_dir}/log.log", "a") as log_file: + log_file.write(f"DiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n" + f"-------\n" + f"DiffFit top fit metric:\n" + f"{metric_json}\n" + f"=======\n\n") dfit_disk_desc = CmdDesc(keyword=[("str_dir", OpenFolderNameArg), From 1e588e3cb3589a69b2c69a06af13c3184f2a370c Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 12:09:49 +0300 Subject: [PATCH 16/79] Use Linux style slash in paths, it's Win compatible The Win double backslash is not Linux compatible --- src/tool.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tool.py b/src/tool.py index cd89ad8..af6d81c 100644 --- a/src/tool.py +++ b/src/tool.py @@ -95,17 +95,17 @@ def interp_backbone_for_mol(mol): class DiffFitSettings: def __init__(self): # viewing - self.view_output_directory: str = "D:\\GIT\\DiffFit\\dev_data\\output" - self.view_target_vol_path: str = "D:\\GIT\\DiffFit\\dev_data\\input\\domain_fit_demo_3domains\\density2.mrc" - self.view_structures_directory: str = "D:\\GIT\\DiffFit\dev_data\input\domain_fit_demo_3domains\subunits_cif" + self.view_output_directory: str = "D:/GIT/DiffFit/dev_data/output" + self.view_target_vol_path: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/density2.mrc" + self.view_structures_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_cif" # computing - self.input_directory: str = "D:\\GIT\\DiffFit\\dev_data\\input\\domain_fit_demo_3domains" - self.target_vol_path: str = "D:\\GIT\\DiffFit\\dev_data\\input\\domain_fit_demo_3domains\\density2.mrc" - self.structures_directory: str = "D:\\GIT\\DiffFit\dev_data\input\domain_fit_demo_3domains\subunits_cif" - self.structures_sim_map_dir: str = "D:\\GIT\\DiffFit\dev_data\input\domain_fit_demo_3domains\subunits_mrc" + self.input_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains" + self.target_vol_path: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/density2.mrc" + self.structures_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_cif" + self.structures_sim_map_dir: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_mrc" - self.output_directory: str = "D:\\GIT\\DiffFit\\dev_data\\output" + self.output_directory: str = "D:/GIT/DiffFit/dev_data/output" self.target_surface_threshold: float = 2.0 self.min_cluster_size: float = 100 @@ -1778,7 +1778,7 @@ def load_button_clicked(self): return print("loading data...") - fit_res = np.load("{0}\\fit_res.npz".format(datasetoutput)) + fit_res = np.load("{0}/fit_res.npz".format(datasetoutput)) target_vol_path = fit_res['target_vol_path'] target_surface_threshold = fit_res['target_surface_threshold'] mol_paths = fit_res['mol_paths'] From f2805ee87b998339e8494eac512ebf9a875505f3 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 16:31:43 +0300 Subject: [PATCH 17/79] remove QFileDialog.Options bug --- src/tool.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tool.py b/src/tool.py index af6d81c..e740ae3 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1322,18 +1322,14 @@ def select_clicked(self, text, target, save = False, pattern = "dir"): ext = "" if save: - options = QFileDialog.Options() - options |= QFileDialog.DontUseNativeDialog - fileName, ext = QFileDialog.getSaveFileName(target, text, "", pattern, options = options) + fileName, ext = QFileDialog.getSaveFileName(target, text, "", pattern) ext = ext[-4:] ext = ext[:3] else: if pattern == "dir": fileName = QFileDialog.getExistingDirectory(target, text) elif len(pattern) > 0 : - options = QFileDialog.Options() - options |= QFileDialog.DontUseNativeDialog - fileName, ext = QFileDialog.getOpenFileName(target, text, "", pattern, options = options) + fileName, ext = QFileDialog.getOpenFileName(target, text, "", pattern) ext = ext[-4:] ext = ext[:3] From b6b3ffff1e1d8de4643753e9643284b46aabf68d Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 17:19:41 +0300 Subject: [PATCH 18/79] mask default to float --- src/DiffAtomComp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 596c376..3b0c218 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -658,13 +658,13 @@ def rotate_centers(mol_centers, e_quaternions): def calculate_metrics(render, elements_sim_density): # Mask to filter elements in render that are greater than zero - mask = render > 0 + mask = (render > 0.0).float() # Apply the mask to the render and elements_sim_density tensors render_filtered = render * mask elements_sim_density_filtered = elements_sim_density * mask - mask_sum = mask.float().sum(dim=-1, keepdim=True) - in_contour_percentage = mask.float().mean(dim=-1) + mask_sum = mask.sum(dim=-1, keepdim=True) + in_contour_percentage = mask.mean(dim=-1) # Calculation of correlation # First, normalize the inputs to have zero mean and unit variance, as Pearson's correlation requires From 7d2596ead2a0567fcc969fc2b3a31e1e16df4432 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 17:27:44 +0300 Subject: [PATCH 19/79] sort all os.listdir --- src/DiffAtomComp.py | 6 +++--- src/convert2mrc_npy.py | 2 +- src/parse_log.py | 8 ++++---- src/run.py | 2 +- src/tool.py | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 3b0c218..c9e603a 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -366,7 +366,7 @@ def mrc_to_npy(mrc_filename): def mrc_folder_to_npy_list(mrc_folder): sim_map_list = [] - for file_name in os.listdir(mrc_folder): + for file_name in sorted(os.listdir(mrc_folder)): full_path = os.path.join(mrc_folder, file_name) # Check if the current path is a file and not a directory if os.path.isfile(full_path): @@ -630,7 +630,7 @@ def center_atom_coords_list(atom_coords_list, mol_centers): def read_all_files_to_atom_coords_list(structures_dir, fit_atom_mode="Backbone"): atom_coords_list = [] # List all files in the given directory - for file_name in os.listdir(structures_dir): + for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) # Check if the current path is a file and not a directory if os.path.isfile(full_path): @@ -1063,7 +1063,7 @@ def diff_atom_comp(target_vol_path: str, e_sqd_log[:, :, :, :, 3:7] /= q_norms mol_paths = [] - for file_name in os.listdir(structures_dir): + for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) mol_paths.append(full_path) diff --git a/src/convert2mrc_npy.py b/src/convert2mrc_npy.py index b448fd1..452cdbe 100644 --- a/src/convert2mrc_npy.py +++ b/src/convert2mrc_npy.py @@ -21,7 +21,7 @@ resolution = float(sys.argv[4]) # Resolution for simulated MRC files gridSpacing = float(sys.argv[5]) # gridSpacing for simulated MRC files -for file_name in os.listdir(structures_dir): +for file_name in sorted(os.listdir(structures_dir)): print(f"\n======= Processing {file_name} =======") full_path = os.path.join(structures_dir, file_name) # Check if the current path is a file and not a directory diff --git a/src/parse_log.py b/src/parse_log.py index 855b025..5fed89b 100644 --- a/src/parse_log.py +++ b/src/parse_log.py @@ -49,7 +49,7 @@ def animate_MQS(e_sqd_log, mol_folder, MQS, session, clean_scene=True): for structure in structures: structure.delete() - mol_files = os.listdir(mol_folder) + mol_files = sorted(os.listdir(mol_folder)) mol_path = os.path.join(mol_folder, mol_files[MQS[0]]) mol = run(session, f"open {mol_path}")[0] @@ -69,7 +69,7 @@ def animate_MQS_2(e_sqd_log, mol_folder, MQS, session, clean_scene=True): for structure in structures: structure.delete() - mol_files = os.listdir(mol_folder) + mol_files = sorted(os.listdir(mol_folder)) mol_path = os.path.join(mol_folder, mol_files[MQS[0]]) mol = run(session, f"open {mol_path}")[0] @@ -110,7 +110,7 @@ def look_at_MQS_idx(e_sqd_log, mol_folder, MQS, session, clean_scene=True): for structure in structures: structure.delete() - mol_files = os.listdir(mol_folder) + mol_files = sorted(os.listdir(mol_folder)) look_at_mol_idx, transformation = get_transformation_at_MQS(e_sqd_log, MQS) @@ -131,7 +131,7 @@ def look_at_cluster(e_sqd_clusters_ordered, mol_folder, cluster_idx, session, cl for structure in structures: structure.delete() - mol_files = os.listdir(mol_folder) + mol_files = sorted(os.listdir(mol_folder)) # mol_files[idx] pairs with e_sqd_log[idx] look_at_mol_idx, transformation = get_transformation_at_idx(e_sqd_clusters_ordered, cluster_idx) diff --git a/src/run.py b/src/run.py index 7654a9e..74d2ead 100644 --- a/src/run.py +++ b/src/run.py @@ -14,7 +14,7 @@ os.makedirs(out_base_dir, exist_ok=True) # Loop through all files in the segmented_maps directory -for map_file in os.listdir(segmented_maps_dir): +for map_file in sorted(os.listdir(segmented_maps_dir)): if map_file.endswith(".mrc"): # Construct the full path to the map file target_vol = os.path.join(segmented_maps_dir, map_file) diff --git a/src/tool.py b/src/tool.py index e740ae3..038ec1d 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1610,7 +1610,7 @@ def sim_button_clicked(self): os.makedirs(output_dir) sim_structures_dir = self.sim_dir.text() - for file_name in os.listdir(sim_structures_dir): + for file_name in sorted(os.listdir(sim_structures_dir)): file_path = os.path.join(sim_structures_dir, file_name) structure = run(self.session, f'open {file_path}')[0] structure_basename = file_name.split('.')[0] From 9feece3ca5e1dab7140afdcd45373ae1e09bd9f2 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 7 Nov 2024 22:21:37 +0300 Subject: [PATCH 20/79] rename cmd to dfit cmd to avoid import conflict --- src/__init__.py | 14 +++++++------- src/{cmd.py => dfit_cmd.py} | 0 2 files changed, 7 insertions(+), 7 deletions(-) rename src/{cmd.py => dfit_cmd.py} (100%) diff --git a/src/__init__.py b/src/__init__.py index fe9c8ef..03253b2 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -51,16 +51,16 @@ def register_command(bi, ci, logger): # description from the ``cmd`` module. # If the description does not contain a synopsis, we # add the one in ``ci``, which comes from bundle_info.xml. - from . import cmd + from . import dfit_cmd if ci.name == "dfit": - func = cmd.dfit - desc = cmd.dfit_desc + func = dfit_cmd.dfit + desc = dfit_cmd.dfit_desc elif ci.name == "dfit disk": - func = cmd.dfit_disk - desc = cmd.dfit_disk_desc + func = dfit_cmd.dfit_disk + desc = dfit_cmd.dfit_disk_desc elif ci.name == "dfit path": - func = cmd.dfit_path - desc = cmd.dfit_path_desc + func = dfit_cmd.dfit_path + desc = dfit_cmd.dfit_path_desc else: raise ValueError("trying to register unknown command: %s" % ci.name) if desc.synopsis is None: diff --git a/src/cmd.py b/src/dfit_cmd.py similarity index 100% rename from src/cmd.py rename to src/dfit_cmd.py From 0a9f96add781d194cc703e63dd102f0b2a23b263 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sun, 10 Nov 2024 14:44:40 +0300 Subject: [PATCH 21/79] Add average density in the table's metric --- src/DiffAtomComp.py | 23 ++++++++++++++++------- src/tablemodel.py | 5 +++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index c9e603a..faa1284 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -691,7 +691,16 @@ def calculate_metrics(render, elements_sim_density): correlation = overlap / (render_norm * elements_sim_density_norm) - return torch.nan_to_num(torch.stack((overlap_mean, correlation, cam, in_contour_percentage), dim=-1)) + average_density_inside = render_filtered.sum(dim=-1) / mask.sum(dim=-1) + average_density_all = render.mean(dim=-1) + + return torch.nan_to_num(torch.stack(( + overlap_mean, + correlation, + cam, + in_contour_percentage, + average_density_inside, + average_density_all), dim=-1)) def diff_fit(volume_list: list, @@ -786,7 +795,7 @@ def diff_fit(volume_list: list, # Training loop log_every = 10 - e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 12], device=device) + e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 14], device=device) # [x, y, z, w, -x, -y, -z, occupied_density_sum] with torch.no_grad(): @@ -807,7 +816,7 @@ def diff_fit(volume_list: list, first_layer_positive_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) occupied_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) - metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 4], device=device) + metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 6], device=device) for mol_idx in range(num_molecules): grid = transform_coords(atom_coords_list[mol_idx], @@ -841,7 +850,7 @@ def diff_fit(volume_list: list, e_sqd_log[:, :, :, log_idx, 0:3] = e_shifts e_sqd_log[:, :, :, log_idx, 3:7] = e_quaternions e_sqd_log[:, :, :, log_idx, 7] = first_layer_positive_density_sum - e_sqd_log[:, :, :, log_idx, 8:12] = metrics_table + e_sqd_log[:, :, :, log_idx, 8:14] = metrics_table if save_results: with open(f"{out_dir}/log.log", "a") as log_file: @@ -987,7 +996,7 @@ def diff_atom_comp(target_vol_path: str, # Training loop log_every = 10 - e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 12], device=device) + e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 14], device=device) # [x, y, z, w, -x, -y, -z, occupied_density_sum] with torch.no_grad(): @@ -1008,7 +1017,7 @@ def diff_atom_comp(target_vol_path: str, first_layer_positive_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) occupied_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) - metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 4], device=device) + metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 6], device=device) for mol_idx in range(num_molecules): grid = transform_coords(atom_coords_list[mol_idx], @@ -1042,7 +1051,7 @@ def diff_atom_comp(target_vol_path: str, e_sqd_log[:, :, :, log_idx, 0:3] = e_shifts e_sqd_log[:, :, :, log_idx, 3:7] = e_quaternions e_sqd_log[:, :, :, log_idx, 7] = first_layer_positive_density_sum - e_sqd_log[:, :, :, log_idx, 8:12] = metrics_table + e_sqd_log[:, :, :, log_idx, 8:14] = metrics_table with open(f"{out_dir}/log.log", "a") as log_file: log_file.write(f"Epoch: {epoch + 1:05d}, " diff --git a/src/tablemodel.py b/src/tablemodel.py index dea14fb..088b362 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -13,7 +13,8 @@ def __init__(self, sqd_cluster_data, sqd_data, mol_paths, parent=None): self._header = ["Id", "Mol name", "Hits", - "Density", "Overlap", "Correlation", "Cam", "Inside"] + "Density (normalized)", "Overlap", "Correlation", "Cam", "Inside", + "Avg Density (in)", "Avg Density (all)"] # mapping of columns (from view to data) # self._mapping = [-1, -1, 10, 11, 12, 13] @@ -60,7 +61,7 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: return int(self._sqd_cluster_data[index.row(), 3]) - elif 3 <= column <= 7: + elif 3 <= column <= 9: record_row = self._sqd_data[mol_idx, record_idx, iter_idx] return float(round(float(record_row[index.column() + 4]) * 10000)) / 10000.0 # for 4 decimals # return float(record_row[index.column() + 4]) From 5ddc95f449fc71e7208a1c35b2d6d1b614c0fe5e Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 11 Nov 2024 13:23:45 +0300 Subject: [PATCH 22/79] new metric DF CID --- src/DiffAtomComp.py | 27 ++++++++++++++++++++------- src/tablemodel.py | 5 +++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index faa1284..6eeb6b9 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -694,13 +694,26 @@ def calculate_metrics(render, elements_sim_density): average_density_inside = render_filtered.sum(dim=-1) / mask.sum(dim=-1) average_density_all = render.mean(dim=-1) + weight_c = 1.0/3.0 + weight_i = 1.0/3.0 + weight_d = 1.0/3.0 + + good_correlation = 0.85 + good_in = 0.3 + good_average_density_all = -0.1 + + df_cid = (weight_c * (correlation - good_correlation) / (1.0 - good_correlation) + + weight_i * (in_contour_percentage - good_in) / (1.0 - good_in) + + weight_d * (average_density_all - good_average_density_all) / (0.5 - good_average_density_all)) + return torch.nan_to_num(torch.stack(( overlap_mean, correlation, cam, in_contour_percentage, average_density_inside, - average_density_all), dim=-1)) + average_density_all, + df_cid), dim=-1)) def diff_fit(volume_list: list, @@ -795,7 +808,7 @@ def diff_fit(volume_list: list, # Training loop log_every = 10 - e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 14], device=device) + e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 15], device=device) # [x, y, z, w, -x, -y, -z, occupied_density_sum] with torch.no_grad(): @@ -816,7 +829,7 @@ def diff_fit(volume_list: list, first_layer_positive_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) occupied_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) - metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 6], device=device) + metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 7], device=device) for mol_idx in range(num_molecules): grid = transform_coords(atom_coords_list[mol_idx], @@ -850,7 +863,7 @@ def diff_fit(volume_list: list, e_sqd_log[:, :, :, log_idx, 0:3] = e_shifts e_sqd_log[:, :, :, log_idx, 3:7] = e_quaternions e_sqd_log[:, :, :, log_idx, 7] = first_layer_positive_density_sum - e_sqd_log[:, :, :, log_idx, 8:14] = metrics_table + e_sqd_log[:, :, :, log_idx, 8:15] = metrics_table if save_results: with open(f"{out_dir}/log.log", "a") as log_file: @@ -996,7 +1009,7 @@ def diff_atom_comp(target_vol_path: str, # Training loop log_every = 10 - e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 14], device=device) + e_sqd_log = torch.zeros([num_molecules, N_quaternions, N_shifts, int(n_iters / 10) + 2, 15], device=device) # [x, y, z, w, -x, -y, -z, occupied_density_sum] with torch.no_grad(): @@ -1017,7 +1030,7 @@ def diff_atom_comp(target_vol_path: str, first_layer_positive_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) occupied_density_sum = torch.zeros([num_molecules, N_quaternions, N_shifts], device=device) - metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 6], device=device) + metrics_table = torch.zeros([num_molecules, N_quaternions, N_shifts, 7], device=device) for mol_idx in range(num_molecules): grid = transform_coords(atom_coords_list[mol_idx], @@ -1051,7 +1064,7 @@ def diff_atom_comp(target_vol_path: str, e_sqd_log[:, :, :, log_idx, 0:3] = e_shifts e_sqd_log[:, :, :, log_idx, 3:7] = e_quaternions e_sqd_log[:, :, :, log_idx, 7] = first_layer_positive_density_sum - e_sqd_log[:, :, :, log_idx, 8:14] = metrics_table + e_sqd_log[:, :, :, log_idx, 8:15] = metrics_table with open(f"{out_dir}/log.log", "a") as log_file: log_file.write(f"Epoch: {epoch + 1:05d}, " diff --git a/src/tablemodel.py b/src/tablemodel.py index 088b362..f3735f6 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -14,7 +14,8 @@ def __init__(self, sqd_cluster_data, sqd_data, mol_paths, parent=None): self._header = ["Id", "Mol name", "Hits", "Density (normalized)", "Overlap", "Correlation", "Cam", "Inside", - "Avg Density (in)", "Avg Density (all)"] + "Avg Density (in)", "Avg Density (all)", + "DF CID"] # mapping of columns (from view to data) # self._mapping = [-1, -1, 10, 11, 12, 13] @@ -61,7 +62,7 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: return int(self._sqd_cluster_data[index.row(), 3]) - elif 3 <= column <= 9: + elif 3 <= column <= 10: record_row = self._sqd_data[mol_idx, record_idx, iter_idx] return float(round(float(record_row[index.column() + 4]) * 10000)) / 10000.0 # for 4 decimals # return float(record_row[index.column() + 4]) From 6b7eb36f5e99965734afd43b9356b7aeca2f751c Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 11 Nov 2024 17:33:04 +0300 Subject: [PATCH 23/79] DF CID threshold field in the View Tab --- src/DiffAtomComp.py | 10 ++++++++-- src/tool.py | 18 +++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 6eeb6b9..ed31457 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -96,7 +96,10 @@ def q2_unit_coord(Q): def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3.0, angle_tolerance: float = 6.0, - sort_column_idx: int = 7, in_contour_threshold = 0.5, correlation_threshold = 0.5): + sort_column_idx: int = 7, + in_contour_threshold: float = 0.5, + correlation_threshold: float = 0.5, + df_cid_threshold: float = 0.15): """ Cluster the fitting results in sqd table by thresholding on shift and quaternion Return the sorted cluster representatives @@ -141,19 +144,22 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 fit_res_filtered_indices = [] in_contour_col_idx = 11 correlation_col_idx = 9 + df_cid_col_idx = 14 for mol_idx in range(N_mol): sqd_highest_corr_np_mol = sqd_highest_corr_np[mol_idx] # Fetch the columns of interest in_contour_percentage_column = sqd_highest_corr_np_mol[:, in_contour_col_idx] correlation_column = sqd_highest_corr_np_mol[:, correlation_col_idx] + df_cid_column = sqd_highest_corr_np_mol[:, df_cid_col_idx] # Create masks for the filtering conditions in_contour_mask = in_contour_percentage_column >= in_contour_threshold correlation_mask = correlation_column >= correlation_threshold + df_cid_mask = df_cid_column >= df_cid_threshold # Combine the masks to get a final filter - combined_mask = in_contour_mask & correlation_mask + combined_mask = in_contour_mask & correlation_mask & df_cid_mask # Apply the mask to filter the original array and also retrieve the indices filtered_indices = np.where(combined_mask) # Get the indices of the filtered rows diff --git a/src/tool.py b/src/tool.py index 038ec1d..d340478 100644 --- a/src/tool.py +++ b/src/tool.py @@ -125,6 +125,8 @@ def __init__(self): self.clustering_in_contour_threshold: float = 0.2 self.clustering_correlation_threshold: float = 0.5 + self.df_cid_threshold = 0.15 + class DiffFitTool(ToolInstance): @@ -280,6 +282,7 @@ def load_settings(self): self.dataset_folder.setText(self.settings.view_output_directory) # clustering + self.df_cid_threshold.setValue(self.settings.df_cid_threshold) self.clustering_in_contour_threshold.setValue(self.settings.clustering_in_contour_threshold) self.clustering_correlation_threshold.setValue(self.settings.clustering_correlation_threshold) self.clustering_angle_tolerance.setValue(self.settings.clustering_angle_tolerance) @@ -318,6 +321,7 @@ def store_settings(self): self.settings.view_target_vol_path = self.target_vol.text() # clustering + self.settings.df_cid_threshold = self.df_cid_threshold.value() self.settings.clustering_in_contour_threshold = self.clustering_in_contour_threshold.value() self.settings.clustering_correlation_threshold = self.clustering_correlation_threshold.value() self.settings.clustering_angle_tolerance = self.clustering_angle_tolerance.value() @@ -949,6 +953,17 @@ def build_view_ui(self, layout): layout.addWidget(self.dataset_folder_select, row, 2) row = row + 1 + df_cid_threshold_label = QLabel() + df_cid_threshold_label.setText("DF CID threshold:") + self.df_cid_threshold = QDoubleSpinBox() + self.df_cid_threshold.setMinimum(-1.0) + self.df_cid_threshold.setMaximum(1.0) + self.df_cid_threshold.setSingleStep(0.01) + self.df_cid_threshold.valueChanged.connect(lambda: self.store_settings()) + layout.addWidget(df_cid_threshold_label, row, 0) + layout.addWidget(self.df_cid_threshold, row, 1, 1, 2) + row = row + 1 + clustering_in_contour_threshold_label = QLabel() clustering_in_contour_threshold_label.setText("In contour threshold:") self.clustering_in_contour_threshold = QDoubleSpinBox() @@ -1368,7 +1383,8 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, self.settings.clustering_shift_tolerance, self.settings.clustering_angle_tolerance, in_contour_threshold=self.settings.clustering_in_contour_threshold, - correlation_threshold=self.settings.clustering_correlation_threshold) + correlation_threshold=self.settings.clustering_correlation_threshold, + df_cid_threshold=self.settings.df_cid_threshold) if self.e_sqd_clusters_ordered is None: self.session.logger.error("No result under these thresholds. Please decrease \"In contour threshold\" or \"Correlation threshold\" or rerun the fitting!") From d54dcc1d72d6395c95a42237f0f9ae63798fb0ae Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 15 Nov 2024 14:57:25 +0300 Subject: [PATCH 24/79] removed double extension in save --- src/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index d340478..554ee72 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1820,7 +1820,7 @@ def save_structure_button_clicked(self): def save_structure(self, targetpath, ext): if len(targetpath) > 0 and self.mol: - run(self.session, "save '{0}.{1}' models #{2}".format(targetpath, ext, self.mol.id[0])) + run(self.session, "save '{0}' models #{1}".format(targetpath, self.mol.id[0])) def save_working_volume(self, targetpath, ext): From f00a2169f7f9d0d360f8107f16aab3adf4279897 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 25 Nov 2024 11:56:05 +0300 Subject: [PATCH 25/79] removed double extension in save mrc --- src/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 554ee72..c71d855 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1825,7 +1825,7 @@ def save_structure(self, targetpath, ext): def save_working_volume(self, targetpath, ext): if len(targetpath) > 0 and self.vol: - run(self.session, "save '{0}.{1}' models #{2}".format(targetpath, ext, self.vol.id[0])) + run(self.session, "save '{0}' models #{1}".format(targetpath, self.vol.id[0])) def simulate_volume_clicked(self): res = self.simulate_volume_resolution.value() From b1164e7ae4f76b6c5a546aea6f48c93f526ca021 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sat, 30 Nov 2024 19:20:22 +0300 Subject: [PATCH 26/79] def generate_q_shells --- src/dfit_cmd.py | 4 + src/q_shells.py | 192 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 src/q_shells.py diff --git a/src/dfit_cmd.py b/src/dfit_cmd.py index 35d03d2..7f1e81d 100644 --- a/src/dfit_cmd.py +++ b/src/dfit_cmd.py @@ -124,6 +124,10 @@ def dfit(session, mol, in_map, elif fit_atom_mode == "All": input_coords = mol.atoms.scene_coords + + # ======= Generate q_shells + + # Fit timer_start = datetime.now() diff --git a/src/q_shells.py b/src/q_shells.py new file mode 100644 index 0000000..a385aef --- /dev/null +++ b/src/q_shells.py @@ -0,0 +1,192 @@ +def unit_sphere_vertices(num_vertices): + from chimerax.surface.shapes import sphere_geometry2 + sphere_vertices = sphere_geometry2(2*num_vertices-4)[0] # 128 points evenly distributed around a unit sphere centred on (0,0,0) + return sphere_vertices + + +RANDOM_SEED=1985 + + +def generate_q_shells(mol, + points_per_shell=8, max_rad=2.0, step=0.1, num_test_points=128, + clustering_iterations=5, include_h=False, randomize_shell_points=True, random_seed=RANDOM_SEED): + ''' + Implementation of the map-model Q-score as described in Pintille et al. (2020): https://www.nature.com/articles/s41592-020-0731-1. + + If the model is well-fitted to the map, the Q-score is essentially an atom-by-atom estimate of "resolvability" + (i.e. how much the map tells us about the true position of the atom). If the model is *not* well-fitted, then + low Q-scores are good pointers to possible problem regions. In practice, of course, usually we have a mixture of + both situations. + + This version of the algorithm has a few minor modifications compared to the original implementation aimed at + improving overall speed. As a result the scores it returns are not identical to the original, typically differing by + +/- 0.04 in individual atom scores and +/- 0.02 in residue averages. This difference can be explained by the different + choice of test points, and reflects the underlying sampling uncertainty in the method. + + This implementation works as follows: + + - For each atom, define a set of shells in steps of `step` angstroms out to `max_rad` angstroms. + - For each shell, try to find at least `points_per_shell` probe points closer to the test atom than + any other atom: + + - For radii smaller than about half a bond length, first try a set of `points_per_shell` + points evenly spread around the spherical surface. + - For larger radii (or if this quick approach fails to find enough points on smaller radii), + start with `num_test_points` evenly spread on the sphere surface, remove points closer to other atoms + than the test atom. If more than `points_per_shell` points remain, perform up to `clustering_iterations` + rounds of k-means clustering to find `points_per_shell` clusters, and choose the point nearest to the + centroid of each cluster. If <= `points_per_shell` points remain, just do the best with what we have. + By default, the "seed" centroids for each cluster are chosen pseudo-randomly from the input points. + Using the same `random_seed` will give the same result each time; varying `random_seed` over multiple + runs can be used to give an idea of the underlying uncertainty in the algorithm. If `randomize_shell_points` + is False, the seed centroids will instead be the closest point (in spherical coordinates) to each of + `points_per_shell` evenly-spaced points on a unit sphere. While this may intuitively seem preferable, + in practice for tightly-packed atoms it leads to oversampling of the "junctions" with other atoms, and + undersampling of the unhindered space. + + Returns: + + - a numpy array with q shells coordinates + - radii + ''' + from datetime import datetime + global_timer_start = datetime.now() + + from chimerax.geometry import find_close_points, find_closest_points, Places + from chimerax.atomic import Residues + import numpy as np + from math import floor + from chimerax.qscore import _kmeans + + pps_vertices = unit_sphere_vertices(points_per_shell) + ref_sphere_vertices = unit_sphere_vertices(num_test_points) + ref_sphere_vertices_large = unit_sphere_vertices(num_test_points * 4) + ref_sphere_vertices_huge = unit_sphere_vertices(num_test_points * 20) + + radii = np.arange(0, max_rad + step / 2, step) + + query_atoms = mol.atoms + + if not include_h: + query_atoms = query_atoms[query_atoms.element_names != 'H'] + + + query_coords = query_atoms.scene_coords + + num_shells = int(floor(max_rad / step)) + + query_atoms_center = [] + query_atoms_points = [] + not_full_shells = 0 + + q_scores = [] + for i, a in enumerate(query_atoms): + + not_full_flag = False + + a_coord = a.scene_coord + _, nearby_i = find_close_points([a_coord], query_coords, max_rad * 3) + nearby_a = query_atoms[nearby_i] + ai = nearby_a.index(a) + nearby_coords = nearby_a.scene_coords + shell_rad = step + local_d_vals = {} + + shell_points = [] + + j = 1 + while shell_rad < max_rad + step / 2: + local_pps = (pps_vertices * shell_rad) + a_coord + if shell_rad < 0.7: # about half a C-C bond length + # Try the quick way first (should succeed for almost all cases unless geometry is seriously wonky) + i1, i2, near1 = find_closest_points(local_pps, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + if len(candidates) == points_per_shell: + shell_rad += step + j += 1 + + shell_points.append(local_pps) + + continue + + local_sphere = (ref_sphere_vertices * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + + local_sphere = (ref_sphere_vertices_large * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + local_sphere = (ref_sphere_vertices_huge * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + not_full_shells += 1 + not_full_flag = True + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + + shell_rad += step + j += 1 + + shell_points.append(points) + + if not_full_flag: + continue + + query_atoms_center.append(a_coord) + query_atoms_points.append(shell_points) + + if not_full_shells: + print(f"Not_full_shells: {not_full_shells}") + + query_atoms_points_array = np.stack( + [np.concatenate(atom_shell_points, axis=0) for atom_shell_points in query_atoms_points]) + query_atoms_center_np = np.stack(query_atoms_center) + query_atoms_center_repeated = np.repeat(query_atoms_center_np[:, np.newaxis, :], points_per_shell, axis=1) + q_shell_coords = np.concatenate([query_atoms_center_repeated, query_atoms_points_array], axis=1) + + print(f"Generate q shells timer: {datetime.now() - global_timer_start}") + + return q_shell_coords, radii + From 42e3a8fc921ee3dd676c49929d4dee2c053490c9 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 2 Dec 2024 13:56:41 +0300 Subject: [PATCH 27/79] q_scores_for_clusters --- src/q_shells.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/q_shells.py b/src/q_shells.py index a385aef..c8f11f6 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -1,3 +1,7 @@ +import torch +import numpy as np +from .DiffAtomComp import quaternion_to_matrix_batch, normalize_coordinates_to_map_origin_torch + def unit_sphere_vertices(num_vertices): from chimerax.surface.shapes import sphere_geometry2 sphere_vertices = sphere_geometry2(2*num_vertices-4)[0] # 128 points evenly distributed around a unit sphere centred on (0,0,0) @@ -190,3 +194,106 @@ def generate_q_shells(mol, return q_shell_coords, radii + +def min_max_d(v): + m = v.data.full_matrix() + mean, sd, _ = v.mean_sd_rms() + max_d = min(mean + sd * 10, m.max()) + min_d = max(mean - sd, m.min()) + return min_d, max_d + + +def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, + ref_sigma=0.6, points_per_shell=8, device="cuda"): + + shifts = [] + quaternions = [] + + for cluster_idx in range(len(fit_res_clusters)): + mol_idx = int(fit_res_clusters[cluster_idx, 0]) + record_idx = int(fit_res_clusters[cluster_idx, 1]) + iter_idx = int(fit_res_clusters[cluster_idx, 2]) + + shift = fit_res_all[mol_idx, record_idx, iter_idx, :3] + quat = np.concatenate( + ([fit_res_all[mol_idx, record_idx, iter_idx, 6]], -fit_res_all[mol_idx, record_idx, iter_idx, 3:6]), + axis=0) + + shifts.append(shift) + quaternions.append(quat) + + shifts = np.stack(shifts) + quaternions = np.stack(quaternions) + shifts = torch.tensor(shifts, device=device).float() + quaternions = torch.tensor(quaternions, device=device).float() + quaternions_matrices = quaternion_to_matrix_batch(quaternions.unsqueeze(0)) + + + q_shell_coords, radii = generate_q_shells(centered_mol) + q_shell_coords = torch.tensor(q_shell_coords, device=device).float() + q_shell_coords = q_shell_coords.reshape([-1, 3]) + + vol_matrix = volume.full_matrix() + vol_origin_and_step = volume.data_origin_and_step() + target_origin = vol_origin_and_step[0] + target_steps = vol_origin_and_step[1] + target_no_negative = vol_matrix + + target = torch.tensor(target_no_negative, device=device).float() + target_dim = target.shape + + import operator + target_size = np.array(list(map(operator.mul, target_dim, target_steps))) + + target_size_x_y_z = [target_size[2], target_size[1], target_size[0]] + target_size_x_y_z_tensor = torch.tensor(target_size_x_y_z, device=device).float() + target_origin_tensor = torch.tensor(target_origin, device=device).float() + + target = target.unsqueeze(0).unsqueeze(0) + + + min_d, max_d = min_max_d(volume) + a = max_d - min_d + b = min_d + + num_shells = len(radii) + + q_reference_gaussian = a * np.exp(-0.5 * (radii / ref_sigma) ** 2) + b + q_ref = np.concatenate([[q_reference_gaussian[0]] * points_per_shell, + *[[q_reference_gaussian[j]] * points_per_shell for j in range(num_shells - 1)]]) + q_ref = torch.tensor(q_ref, device=device, dtype=torch.float32) + q_ref -= q_ref.mean() + + + q_scores = [] + for row in range(len(fit_res_clusters)): + transformed_coords = torch.matmul(q_shell_coords, quaternions_matrices[:, row:row + 1, :, :]) + + transformed_coords += shifts[row, :] + + q_shell_coords_normalized_to_target = normalize_coordinates_to_map_origin_torch(transformed_coords, + target_size_x_y_z_tensor, + target_origin_tensor) + + q_shell_coords_normalized_to_target = q_shell_coords_normalized_to_target.reshape([1, 1, -1, 168, 3]) + + q_measure = torch.nn.functional.grid_sample(target, q_shell_coords_normalized_to_target, 'bilinear', 'border', + align_corners=True) + + q_measure.squeeze_() + + q_measure -= q_measure.mean(dim=-1, keepdim=True) + + inner_product = torch.matmul(q_measure, q_ref) + q_measure_l2 = torch.norm(q_measure, p=2, dim=-1) + q_ref_l2 = torch.norm(q_ref, p=2, dim=-1) + q_score_torch = inner_product / (q_measure_l2 * q_ref_l2) + + q_scores.append(q_score_torch.mean()) + + q_scores_tensor = torch.stack(q_scores) + top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) + + # Display the results + print("Top 10 values:", top_10_values) + print("Indices of top 10 values:", top_10_indices) \ No newline at end of file From 414d6f821bc8bccacd6a9f935bea2137cdba186b Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 2 Dec 2024 21:10:47 +0300 Subject: [PATCH 28/79] print top_10_indices + 1 --- src/q_shells.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/q_shells.py b/src/q_shells.py index c8f11f6..37ef814 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -296,4 +296,6 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, # Display the results print("Top 10 values:", top_10_values) - print("Indices of top 10 values:", top_10_indices) \ No newline at end of file + print("Indices of top 10 values + 1:", top_10_indices + 1) + + return top_10_values, top_10_indices \ No newline at end of file From e4314495459aaf6b846a3da5f27bddb2fee8b925 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 09:41:46 +0300 Subject: [PATCH 29/79] handle nan in q_scores --- src/q_shells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/q_shells.py b/src/q_shells.py index 37ef814..b41d4b8 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -289,7 +289,7 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, q_ref_l2 = torch.norm(q_ref, p=2, dim=-1) q_score_torch = inner_product / (q_measure_l2 * q_ref_l2) - q_scores.append(q_score_torch.mean()) + q_scores.append(q_score_torch[~torch.isnan(q_score_torch)].mean()) q_scores_tensor = torch.stack(q_scores) top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) From e09196d4c57ab9fd0cbdc0be01305b0a67f1edad Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 10:53:55 +0300 Subject: [PATCH 30/79] interactive fit q score done --- src/q_shells.py | 16 +++++++++------- src/tablemodel.py | 11 +++++++---- src/tool.py | 9 +++++++-- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/q_shells.py b/src/q_shells.py index b41d4b8..c93ad8f 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -292,10 +292,12 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, q_scores.append(q_score_torch[~torch.isnan(q_score_torch)].mean()) q_scores_tensor = torch.stack(q_scores) - top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) - - # Display the results - print("Top 10 values:", top_10_values) - print("Indices of top 10 values + 1:", top_10_indices + 1) - - return top_10_values, top_10_indices \ No newline at end of file + # top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) + # + # # Display the results + # print("Top 10 values:", top_10_values) + # print("Indices of top 10 values + 1:", top_10_indices + 1) + # + # return top_10_values, top_10_indices + + return q_scores_tensor.cpu().numpy() \ No newline at end of file diff --git a/src/tablemodel.py b/src/tablemodel.py index f3735f6..1389c85 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -3,16 +3,17 @@ class TableModel(QAbstractTableModel): """A model to interface a Qt view with pandas dataframe """ - def __init__(self, sqd_cluster_data, sqd_data, mol_paths, parent=None): + def __init__(self, sqd_cluster_data, sqd_data, mol_paths, q_scores_np, parent=None): QAbstractTableModel.__init__(self, parent) self._sqd_data = sqd_data self._sqd_cluster_data = sqd_cluster_data import os self._mol_names = [os.path.splitext(os.path.basename(path))[0] for path in mol_paths] + self._q_scores_np = q_scores_np - self._header = ["Id", "Mol name", "Hits", + self._header = ["Id", "Mol name", "Q-score", "Hits", "Density (normalized)", "Overlap", "Correlation", "Cam", "Inside", "Avg Density (in)", "Avg Density (all)", "DF CID"] @@ -61,10 +62,12 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): elif column == 1: return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: + return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals + elif column == 3: return int(self._sqd_cluster_data[index.row(), 3]) - elif 3 <= column <= 10: + elif 4 <= column <= 11: record_row = self._sqd_data[mol_idx, record_idx, iter_idx] - return float(round(float(record_row[index.column() + 4]) * 10000)) / 10000.0 # for 4 decimals + return float(round(float(record_row[column + 3]) * 10000)) / 10000.0 # for 4 decimals # return float(record_row[index.column() + 4]) return None diff --git a/src/tool.py b/src/tool.py index c71d855..dca45fa 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1391,13 +1391,18 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, self.proxyModel = None return - self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths) + # Calculate Q-scores + from .q_shells import q_scores_for_clusters + q_scores_np = q_scores_for_clusters(self.mol, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) + + + self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, q_scores_np) self.proxyModel = QSortFilterProxyModel() self.proxyModel.setSourceModel(self.model) self.view.setModel(self.proxyModel) self.view.setSortingEnabled(True) - self.view.sortByColumn(0, Qt.AscendingOrder) + self.view.sortByColumn(2, Qt.DescendingOrder) self.view.reset() self.view.show() From d21547df4ef1add7bac4e37bc661486f35f01414 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 14:06:32 +0300 Subject: [PATCH 31/79] up and down key work for the result table --- src/tool.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/tool.py b/src/tool.py index dca45fa..df7fe4d 100644 --- a/src/tool.py +++ b/src/tool.py @@ -128,6 +128,26 @@ def __init__(self): self.df_cid_threshold = 0.15 +class DiffFitTableView(QTableView): + def __init__(self, parent=None): + super().__init__(parent) + self.parent = parent + + self.callback = None # Callback function to call on key press + + def keyPressEvent(self, event): + super().keyPressEvent(event) + if event.key() == Qt.Key.Key_Up or event.key() == Qt.Key.Key_Down: + # Trigger the callback function if registered + if self.callback: + current_index = self.currentIndex() + self.callback(current_index) + + def up_down_key_callback(self, callback): + """Register a callback function to be called on key press.""" + self.callback = callback + + class DiffFitTool(ToolInstance): # Inheriting from ToolInstance makes us known to the ChimeraX tool mangager, @@ -1025,12 +1045,13 @@ def build_view_ui(self, layout): #layout.addWidget(self.line_edit) # table view of all the results - view = QTableView() + view = DiffFitTableView() view.resize(800, 500) view.horizontalHeader().setStretchLastSection(True) view.setAlternatingRowColors(True) view.setSelectionBehavior(QTableView.SelectRows) - view.clicked.connect(self.table_row_clicked) + view.clicked.connect(self.table_row_clicked) + view.up_down_key_callback(self.table_row_clicked) layout.addWidget(view) self.view = view layout.addWidget(view, row, 0, 1, 3) From 2a4363f4c82839fa98c1cc4d7d665f3e83f24d95 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 14:27:04 +0300 Subject: [PATCH 32/79] better programming habit for up down key callback --- src/tool.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tool.py b/src/tool.py index df7fe4d..d4af63e 100644 --- a/src/tool.py +++ b/src/tool.py @@ -133,19 +133,19 @@ def __init__(self, parent=None): super().__init__(parent) self.parent = parent - self.callback = None # Callback function to call on key press + self.up_down_key_callback = None # Callback function to call on key press def keyPressEvent(self, event): super().keyPressEvent(event) if event.key() == Qt.Key.Key_Up or event.key() == Qt.Key.Key_Down: # Trigger the callback function if registered - if self.callback: + if self.up_down_key_callback: current_index = self.currentIndex() - self.callback(current_index) + self.up_down_key_callback(current_index) - def up_down_key_callback(self, callback): + def setUpDownKeyCallback(self, callback): """Register a callback function to be called on key press.""" - self.callback = callback + self.up_down_key_callback = callback class DiffFitTool(ToolInstance): @@ -1051,7 +1051,7 @@ def build_view_ui(self, layout): view.setAlternatingRowColors(True) view.setSelectionBehavior(QTableView.SelectRows) view.clicked.connect(self.table_row_clicked) - view.up_down_key_callback(self.table_row_clicked) + view.setUpDownKeyCallback(self.table_row_clicked) layout.addWidget(view) self.view = view layout.addWidget(view, row, 0, 1, 3) From bda63366010c0c81dd26f08a5ee3af37506b0a42 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 17:53:03 +0300 Subject: [PATCH 33/79] removed unnecessary calculation from q score --- src/q_shells.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/q_shells.py b/src/q_shells.py index c93ad8f..28cb64d 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -59,7 +59,7 @@ def generate_q_shells(mol, from chimerax.geometry import find_close_points, find_closest_points, Places from chimerax.atomic import Residues import numpy as np - from math import floor + from chimerax.qscore import _kmeans pps_vertices = unit_sphere_vertices(points_per_shell) @@ -77,7 +77,6 @@ def generate_q_shells(mol, query_coords = query_atoms.scene_coords - num_shells = int(floor(max_rad / step)) query_atoms_center = [] query_atoms_points = [] From 1509d3ee334aeadab5102f3ae7870cb19e268b07 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 21:13:13 +0300 Subject: [PATCH 34/79] ThreadPoolExecutor for generate_q_shells_parallel; but slower --- src/q_shells.py | 150 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/src/q_shells.py b/src/q_shells.py index 28cb64d..23c0e8b 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -299,4 +299,152 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, # # return top_10_values, top_10_indices - return q_scores_tensor.cpu().numpy() \ No newline at end of file + return q_scores_tensor.cpu().numpy() + + +from concurrent.futures import ThreadPoolExecutor +def process_atom(atom_idx, query_atoms, query_coords, + pps_vertices, + ref_sphere_vertices, ref_sphere_vertices_large, ref_sphere_vertices_huge, + step, max_rad, points_per_shell, + clustering_iterations, + randomize_shell_points, random_seed): + from chimerax.geometry import find_close_points, find_closest_points + from chimerax.qscore import _kmeans + + not_full_flag = False + + a = query_atoms[atom_idx] + a_coord = query_coords[atom_idx] + _, nearby_i = find_close_points([a_coord], query_coords, max_rad * 3) + nearby_a = query_atoms[nearby_i] + ai = nearby_a.index(a) + nearby_coords = nearby_a.scene_coords + shell_rad = step + local_d_vals = {} + + shell_points = [] + + j = 1 + while shell_rad < max_rad + step / 2: + local_pps = (pps_vertices * shell_rad) + a_coord + if shell_rad < 0.7: # about half a C-C bond length + # Try the quick way first (should succeed for almost all cases unless geometry is seriously wonky) + i1, i2, near1 = find_closest_points(local_pps, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + if len(candidates) == points_per_shell: + shell_rad += step + j += 1 + + shell_points.append(local_pps) + + continue + + local_sphere = (ref_sphere_vertices * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + + local_sphere = (ref_sphere_vertices_large * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + local_sphere = (ref_sphere_vertices_huge * shell_rad) + a_coord + i1, i2, near1 = find_closest_points(local_sphere, nearby_coords, shell_rad * 1.5) + closest = near1 + candidates = i1[closest == ai] + + if len(candidates) < points_per_shell: + not_full_flag = True + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + else: + points = local_sphere[candidates] + if not randomize_shell_points: + labels, closest = _kmeans.spherical_k_means_defined(points, a_coord, points_per_shell, + local_pps, clustering_iterations) + else: + labels, closest = _kmeans.spherical_k_means_random(points, a_coord, points_per_shell, + clustering_iterations, random_seed + j) + + points = points[closest] + + shell_rad += step + j += 1 + + shell_points.append(points) + + return None if not_full_flag else (a_coord, shell_points) + + +def generate_q_shells_parallel(mol, + points_per_shell=8, max_rad=2.0, step=0.1, + num_test_points=128, clustering_iterations=5, + include_h=False, randomize_shell_points=True, random_seed=RANDOM_SEED): + from datetime import datetime + + from datetime import datetime + global_timer_start = datetime.now() + + query_atoms = mol.atoms + if not include_h: + query_atoms = query_atoms[query_atoms.element_names != 'H'] + query_coords = query_atoms.scene_coords + + pps_vertices = unit_sphere_vertices(points_per_shell) + ref_sphere_vertices = unit_sphere_vertices(num_test_points) + ref_sphere_vertices_large = unit_sphere_vertices(num_test_points * 4) + ref_sphere_vertices_huge = unit_sphere_vertices(num_test_points * 20) + + # Multithreading + results = [] + with ThreadPoolExecutor() as executor: + futures = [ + executor.submit( + process_atom, atom_idx, query_atoms, query_coords, pps_vertices, ref_sphere_vertices, + ref_sphere_vertices_large, ref_sphere_vertices_huge, step, max_rad, points_per_shell, + clustering_iterations, randomize_shell_points, random_seed + ) + for atom_idx in range(len(query_atoms)) + ] + for future in futures: + result = future.result() + if result is not None: + results.append(result) + + query_atoms_center, query_atoms_points = zip(*results) + query_atoms_points_array = np.stack( + [np.concatenate(atom_shell_points, axis=0) for atom_shell_points in query_atoms_points]) + query_atoms_center_np = np.stack(query_atoms_center) + query_atoms_center_repeated = np.repeat(query_atoms_center_np[:, np.newaxis, :], points_per_shell, axis=1) + q_shell_coords = np.concatenate([query_atoms_center_repeated, query_atoms_points_array], axis=1) + + print(f"Generate q shells timer: {datetime.now() - global_timer_start}") + + return q_shell_coords, np.arange(0, max_rad + step / 2, step) From a4b255502d8186263d778ef1a4d620a46c6b1f8f Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 3 Dec 2024 23:10:26 +0300 Subject: [PATCH 35/79] generate_q_shells_simple --- src/q_shells.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/q_shells.py b/src/q_shells.py index 23c0e8b..7917130 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -448,3 +448,57 @@ def generate_q_shells_parallel(mol, print(f"Generate q shells timer: {datetime.now() - global_timer_start}") return q_shell_coords, np.arange(0, max_rad + step / 2, step) + + +def generate_q_shells_simple(mol, + points_per_shell=8, max_rad=2.0, step=0.1, num_test_points=128, + clustering_iterations=5, include_h=False, randomize_shell_points=True, random_seed=RANDOM_SEED): + ''' + Returns: + + - a numpy array with q shells coordinates + - radii + ''' + from datetime import datetime + global_timer_start = datetime.now() + + from chimerax.geometry import find_close_points, find_closest_points, Places + import numpy as np + + + pps_vertices = unit_sphere_vertices(points_per_shell) + + query_atoms = mol.atoms + + if not include_h: + query_atoms = query_atoms[query_atoms.element_names != 'H'] + + query_coords = query_atoms.scene_coords + + query_atoms_center = [] + query_atoms_points = [] + + for i, a in enumerate(query_atoms): + + a_coord = a.scene_coord + shell_rad = step + shell_points = [] + + j = 1 + while shell_rad < max_rad + step / 2: + local_pps = (pps_vertices * shell_rad) + a_coord + shell_points.append(local_pps) + shell_rad += step + + query_atoms_center.append(a_coord) + query_atoms_points.append(shell_points) + + query_atoms_points_array = np.stack( + [np.concatenate(atom_shell_points, axis=0) for atom_shell_points in query_atoms_points]) + query_atoms_center_np = np.stack(query_atoms_center) + query_atoms_center_repeated = np.repeat(query_atoms_center_np[:, np.newaxis, :], points_per_shell, axis=1) + q_shell_coords = np.concatenate([query_atoms_center_repeated, query_atoms_points_array], axis=1) + + print(f"Generate q shells timer: {datetime.now() - global_timer_start}") + + return q_shell_coords, np.arange(0, max_rad + step / 2, step) \ No newline at end of file From bb3a0a45be9dccdf5f78280686a4f66c30975665 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 14:52:46 +0300 Subject: [PATCH 36/79] Record clustering time --- src/DiffAtomComp.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index ed31457..013036e 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -174,6 +174,9 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 mol_shift = fit_res_filtered[mol_idx][:, :3] mol_q = fit_res_filtered[mol_idx][:, 3:7] + print(f"Clustering {len(mol_shift)} fits") + timer_start = datetime.now() + T = [] for i in range(len(mol_shift)): shift = mol_shift[i] @@ -187,6 +190,9 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 transformation = Place(matrix=T_matrix) T.append(transformation) + print(f"Convert to matrix time: {datetime.now() - timer_start}") + timer_start = datetime.now() + b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_centers[mol_idx]) mol_transform_label = [] unique_id = 0 @@ -203,6 +209,8 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 mol_transform_label.append(T_ID_dict[id(close[0])]) T_ID_dict[id(ptf)] = T_ID_dict[id(close[0])] + print(f"ChimeraX bin clustering: {datetime.now() - timer_start}") + unique_labels, indices, counts = np.unique(mol_transform_label, axis=0, return_inverse=True, return_counts=True) for cluster_idx in range(len(unique_labels)): From 1ba5bc3033a804d9d9df2cbff443abbdd7567b34 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 14:53:32 +0300 Subject: [PATCH 37/79] safeguard q score when self.mol is None --- src/tablemodel.py | 4 +++- src/tool.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/tablemodel.py b/src/tablemodel.py index 1389c85..b7c25dc 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -62,7 +62,9 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): elif column == 1: return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: - return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals + if self._q_scores_np: + return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals + return None elif column == 3: return int(self._sqd_cluster_data[index.row(), 3]) elif 4 <= column <= 11: diff --git a/src/tool.py b/src/tool.py index d4af63e..bfba451 100644 --- a/src/tool.py +++ b/src/tool.py @@ -211,6 +211,7 @@ def __init__(self, session, tool_name): self.spheres = None self.proxyModel = None + self.mol = None def _build_ui(self): @@ -1413,8 +1414,10 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, return # Calculate Q-scores - from .q_shells import q_scores_for_clusters - q_scores_np = q_scores_for_clusters(self.mol, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) + q_scores_np = None + if self.mol: + from .q_shells import q_scores_for_clusters + q_scores_np = q_scores_for_clusters(self.mol, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, q_scores_np) From 5647d5511c60b511a7481db91cbe0b7696d14660 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 17:05:21 +0300 Subject: [PATCH 38/79] load only ['.mrc', '.map'] from the sim map folder --- src/DiffAtomComp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 013036e..31ac721 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -384,8 +384,10 @@ def mrc_folder_to_npy_list(mrc_folder): full_path = os.path.join(mrc_folder, file_name) # Check if the current path is a file and not a directory if os.path.isfile(full_path): - data, steps, origin = mrc_to_npy(full_path) - sim_map_list.append((data, steps, origin)) + file_extension = os.path.splitext(full_path)[1].lower() + if file_extension in ['.mrc', '.map']: + data, steps, origin = mrc_to_npy(full_path) + sim_map_list.append((data, steps, origin)) return sim_map_list From e74acf8d97f5a4df3d97a91c2f449435e4f0e5eb Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 17:18:19 +0300 Subject: [PATCH 39/79] load only ['.pdb', '.cif'] from the structure folder --- src/DiffAtomComp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 31ac721..ee2d73b 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -458,8 +458,6 @@ def read_file_and_get_coordinates(file_path, fit_atom_mode="Backbone"): parser = MMCIFParser() elif file_extension == '.pdb': parser = PDBParser() - else: - raise ValueError("Unsupported file format. Please provide a .mmcif or .pdb file.") # Parse the structure structure_id = os.path.basename(file_path).split('.')[0] # Use file name as structure ID @@ -648,8 +646,9 @@ def read_all_files_to_atom_coords_list(structures_dir, fit_atom_mode="Backbone") # List all files in the given directory for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) - # Check if the current path is a file and not a directory - if os.path.isfile(full_path): + + file_extension = os.path.splitext(full_path)[1].lower() + if file_extension in ['.pdb', '.cif']: # Read the atom coordinates from the file atom_coords = read_file_and_get_coordinates(full_path, fit_atom_mode) # Append the coordinates to the list @@ -1103,7 +1102,9 @@ def diff_atom_comp(target_vol_path: str, mol_paths = [] for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) - mol_paths.append(full_path) + file_extension = os.path.splitext(full_path)[1].lower() + if file_extension in ['.pdb', '.cif']: + mol_paths.append(full_path) np.savez_compressed(f"{out_dir}/fit_res.npz", target_vol_path=target_vol_path, From b338dd5604463367f7fda1b128aa46530a26ca27 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 17:34:37 +0300 Subject: [PATCH 40/79] load sim map from str folder and removed sim map folder --- src/DiffAtomComp.py | 6 +----- src/dfit_cmd.py | 6 +----- src/tool.py | 18 +----------------- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index ee2d73b..c784e65 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -928,7 +928,6 @@ def diff_atom_comp(target_vol_path: str, target_surface_threshold: float, min_cluster_size: float, # not in use, use 100 as a placeholder structures_dir: str, - structures_sim_map_dir: str, fit_atom_mode:str = "Backbone", Gaussian_mode:str = "Gaussian with negative (shrink)", N_shifts: int = 10, @@ -987,7 +986,7 @@ def diff_atom_comp(target_vol_path: str, num_molecules = len(atom_coords_list) # read simulated map - sim_map_list = mrc_folder_to_npy_list(structures_sim_map_dir) + sim_map_list = mrc_folder_to_npy_list(structures_dir) elements_sim_density_list = sample_sim_map(atom_coords_list, sim_map_list, num_molecules, device) # center the mol @@ -1153,8 +1152,6 @@ def parse_ints(arg): parser.add_argument('--structures_dir', type=str, help="directory containing the structures to be fit") - parser.add_argument('--structures_sim_map_dir', type=str, - help="directory containing the simulated map from the structures to be fit") parser.add_argument('--out_dir', type=str, default="out", help="Output directory") @@ -1181,7 +1178,6 @@ def parse_ints(arg): args.target_surface_threshold, args.min_cluster_size, args.structures_dir, - args.structures_sim_map_dir, out_dir=args.out_dir, out_dir_exist_ok=args.out_dir_exist_ok, N_shifts=args.N_shifts, diff --git a/src/dfit_cmd.py b/src/dfit_cmd.py index 7f1e81d..fab2371 100644 --- a/src/dfit_cmd.py +++ b/src/dfit_cmd.py @@ -212,7 +212,7 @@ def dfit(session, mol, in_map, # fit_atom_mode out_dir device -def dfit_disk(session, str_dir, sim_dir, in_map, level, +def dfit_disk(session, str_dir, in_map, level, num_positions=10, num_rotations=100, @@ -261,7 +261,6 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, f"-------\n" f"Disk mode\n" f"Structures Folder: {str_dir}\n" - f"Sim-map Folder: {sim_dir}\n" f"Target Volume: {in_map}\n" f"Target Surface Threshold: {level}\n" f"-------\n" @@ -304,7 +303,6 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, target_surface_threshold=level, min_cluster_size=100, structures_dir=str_dir, - structures_sim_map_dir=sim_dir, fit_atom_mode=fit_atom_mode, Gaussian_mode=Gaussian_mode, N_shifts=num_positions, @@ -355,7 +353,6 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, dfit_disk_desc = CmdDesc(keyword=[("str_dir", OpenFolderNameArg), - ("sim_dir", OpenFolderNameArg), ("in_map", OpenFileNameArg), ("level", FloatArg), @@ -376,7 +373,6 @@ def dfit_disk(session, str_dir, sim_dir, in_map, level, ("out_dir", SaveFolderNameArg), ("device", StringArg)], required_arguments=["str_dir", - "sim_dir", "in_map", "level"]) diff --git a/src/tool.py b/src/tool.py index bfba451..a4c62c9 100644 --- a/src/tool.py +++ b/src/tool.py @@ -103,7 +103,6 @@ def __init__(self): self.input_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains" self.target_vol_path: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/density2.mrc" self.structures_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_cif" - self.structures_sim_map_dir: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_mrc" self.output_directory: str = "D:/GIT/DiffFit/dev_data/output" @@ -285,7 +284,6 @@ def load_settings(self): #compute self.target_vol_path.setText(self.settings.target_vol_path) self.structures_dir.setText(self.settings.structures_directory) - self.structures_sim_map_dir.setText(self.settings.structures_sim_map_dir) self.out_dir.setText(self.settings.output_directory) self.target_surface_threshold.setValue(self.settings.target_surface_threshold) self.min_cluster_size.setValue(self.settings.min_cluster_size) @@ -320,7 +318,6 @@ def store_settings(self): #compute self.settings.target_vol_path = self.target_vol_path.text() self.settings.structures_directory = self.structures_dir.text() - self.settings.structures_sim_map_dir = self.structures_sim_map_dir.text() self.settings.output_directory = self.out_dir.text() self.settings.target_surface_threshold = self.target_surface_threshold.value() self.settings.min_cluster_size = self.min_cluster_size.value() @@ -562,18 +559,7 @@ def build_compute_ui(self, layout): layout.addWidget(self.structures_dir, row, 1) layout.addWidget(structures_dir_select, row, 2) row = row + 1 - - structures_sim_map_dir_label = QLabel() - structures_sim_map_dir_label.setText("Structures Sim-map Folder:") - self.structures_sim_map_dir = QLineEdit() - self.structures_sim_map_dir.textChanged.connect(lambda: self.store_settings()) - structures_sim_map_dir_select = QPushButton("Select") - structures_sim_map_dir_select.clicked.connect(lambda: self.select_clicked("Structures Sim-map Folder", self.structures_sim_map_dir)) - layout.addWidget(structures_sim_map_dir_label, row, 0) - layout.addWidget(self.structures_sim_map_dir, row, 1) - layout.addWidget(structures_sim_map_dir_select, row, 2) - row = row + 1 - + out_dir_label = QLabel() out_dir_label.setText("Output Folder:") self.out_dir = QLineEdit() @@ -1716,7 +1702,6 @@ def run_button_clicked(self): f"Disk mode\n" f"Target Volume: {self.settings.target_vol_path}\n" f"Structures Folder: {self.settings.structures_directory}\n" - f"Sim-map Folder: {self.settings.structures_sim_map_dir}\n" f"Target Surface Threshold: {self.settings.target_surface_threshold}\n" f"-------\n" f"# positions: {self.settings.N_shifts}\n" @@ -1747,7 +1732,6 @@ def run_button_clicked(self): target_surface_threshold=self.settings.target_surface_threshold, min_cluster_size=self.settings.min_cluster_size, structures_dir=self.settings.structures_directory, - structures_sim_map_dir=self.settings.structures_sim_map_dir, fit_atom_mode=self.fit_atom_mode, Gaussian_mode=self.Gaussian_mode, N_shifts=self.settings.N_shifts, From 35eb77c24f3f337f214e53e299ef0e42fe376d69 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 17:35:24 +0300 Subject: [PATCH 41/79] moved demo mrc to str folder --- .../{subunits_mrc => subunits_cif}/I7LV70_D1.mrc | Bin .../{subunits_mrc => subunits_cif}/I7M317_D1.mrc | Bin .../{subunits_mrc => subunits_cif}/I7MLV6_D3.mrc | Bin 3 files changed, 0 insertions(+), 0 deletions(-) rename dev_data/input/domain_fit_demo_3domains/{subunits_mrc => subunits_cif}/I7LV70_D1.mrc (100%) rename dev_data/input/domain_fit_demo_3domains/{subunits_mrc => subunits_cif}/I7M317_D1.mrc (100%) rename dev_data/input/domain_fit_demo_3domains/{subunits_mrc => subunits_cif}/I7MLV6_D3.mrc (100%) diff --git a/dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7LV70_D1.mrc b/dev_data/input/domain_fit_demo_3domains/subunits_cif/I7LV70_D1.mrc similarity index 100% rename from dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7LV70_D1.mrc rename to dev_data/input/domain_fit_demo_3domains/subunits_cif/I7LV70_D1.mrc diff --git a/dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7M317_D1.mrc b/dev_data/input/domain_fit_demo_3domains/subunits_cif/I7M317_D1.mrc similarity index 100% rename from dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7M317_D1.mrc rename to dev_data/input/domain_fit_demo_3domains/subunits_cif/I7M317_D1.mrc diff --git a/dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7MLV6_D3.mrc b/dev_data/input/domain_fit_demo_3domains/subunits_cif/I7MLV6_D3.mrc similarity index 100% rename from dev_data/input/domain_fit_demo_3domains/subunits_mrc/I7MLV6_D3.mrc rename to dev_data/input/domain_fit_demo_3domains/subunits_cif/I7MLV6_D3.mrc From e8be3d4070432c7c647aaf1d1733478f60cf74b0 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 4 Dec 2024 21:41:53 +0300 Subject: [PATCH 42/79] q shells generator; removed sim map out folder --- .gitignore | 1 + src/tool.py | 114 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 2134488..705cc79 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ dev_data/output_old/* !dev_data/output/dev_comp_domain_fit_3_domains_10s20q_new_q case_study_data run +/dev_data/input/domain_fit_demo_3domains/subunits_cif_test diff --git a/src/tool.py b/src/tool.py index a4c62c9..021f45a 100644 --- a/src/tool.py +++ b/src/tool.py @@ -815,23 +815,11 @@ def build_utilities_ui(self, layout): row = row + 1 - doc_label = QLabel("Simulate a map for each structure in the folder") + doc_label = QLabel("Simulate a map for each structure in a folder") doc_label.setWordWrap(True) layout.addWidget(doc_label, row, 0, 1, 3) row = row + 1 - sim_out_dir_label = QLabel() - sim_out_dir_label.setText("Output Folder:") - self.sim_out_dir = QLineEdit() - self.sim_out_dir.setText("sim_out") - sim_out_dir_select = QPushButton("Select") - sim_out_dir_select.clicked.connect( - lambda: self.select_clicked("Output folder for the simulated maps", self.sim_out_dir)) - layout.addWidget(sim_out_dir_label, row, 0) - layout.addWidget(self.sim_out_dir, row, 1) - layout.addWidget(sim_out_dir_select, row, 2) - row = row + 1 - sim_dir_label = QLabel() sim_dir_label.setText("Structures Folder:") self.sim_dir = QLineEdit() @@ -861,6 +849,37 @@ def build_utilities_ui(self, layout): layout.addWidget(button, row, 2) row = row + 1 + doc_label = QLabel("Generate q shells for each structure in a folder") + doc_label.setWordWrap(True) + layout.addWidget(doc_label, row, 0, 1, 3) + row = row + 1 + + q_shells_label = QLabel() + q_shells_label.setText("Structures Folder:") + self.q_shells_dir = QLineEdit() + self.q_shells_dir.setText("split_out") + q_shells_dir_select = QPushButton("Select") + q_shells_dir_select.clicked.connect( + lambda: self.select_clicked("Folder containing the structures", self.q_shells_dir)) + layout.addWidget(q_shells_label, row, 0) + layout.addWidget(self.q_shells_dir, row, 1) + layout.addWidget(q_shells_dir_select, row, 2) + row = row + 1 + + q_shells_mode_label = QLabel() + q_shells_mode_label.setText("Mode:") + button = QPushButton() + button.setText("Full (non-overlapping)") + button.clicked.connect(lambda: self.q_shell_button_clicked("Full")) + layout.addWidget(q_shells_mode_label, row, 0) + layout.addWidget(button, row, 1) + + button = QPushButton() + button.setText("Simple") + button.clicked.connect(lambda: self.q_shell_button_clicked("Simple")) + layout.addWidget(button, row, 2) + row = row + 1 + vertical_spacer = QSpacerItem(1, 1, QSizePolicy.Minimum, QSizePolicy.Expanding) layout.addItem(vertical_spacer, row + 1, 0) @@ -1635,23 +1654,64 @@ def dependency_install_button_clicked(self): run_logged_pip(cmd_list, self.session.logger) - def sim_button_clicked(self): - output_dir = self.sim_out_dir.text() - if not os.path.exists(output_dir): - os.makedirs(output_dir) + def q_shell_button_clicked(self, mode: str): + q_shell_generator = None + ext = "" + if mode == "Full": + from .q_shells import generate_q_shells + q_shell_generator = generate_q_shells + ext = "centered_q_shells.full.npz" + elif mode == "Simple": + from .q_shells import generate_q_shells_simple + q_shell_generator = generate_q_shells_simple + ext = "centered_q_shells.simple.npz" + + str_dir = self.q_shells_dir.text() + + for file_name in sorted(os.listdir(str_dir)): + file_path = os.path.join(str_dir, file_name) + file_extension = os.path.splitext(file_path)[1].lower() + if file_extension in ['.pdb', '.cif']: + print(f"Q shell generator: {q_shell_generator}: {file_path}") - sim_structures_dir = self.sim_dir.text() - for file_name in sorted(os.listdir(sim_structures_dir)): - file_path = os.path.join(sim_structures_dir, file_name) - structure = run(self.session, f'open {file_path}')[0] - structure_basename = file_name.split('.')[0] + mol = run(self.session, f'open {file_path}')[0] + mol_basename = file_name.split('.')[0] - mrc_filename = f"{structure_basename}.mrc" - mrc_filepath = os.path.join(output_dir, mrc_filename) + # center mol + from chimerax.geometry import Place + mol_center = mol.atoms.coords.mean(axis=0) + transform = Place(origin=-mol_center) + mol.atoms.transform(transform) + mol.position = Place() - vol = run(self.session, f'molmap #{structure.id[0]} {self.sim_resolution.value()} gridSpacing 1.0') - run(self.session, f"save {mrc_filepath} #{vol.id[0]}") - run(self.session, f"close #{vol.id[0]}") + q_shell_coords, radii = q_shell_generator(mol) + + q_shells_filepath = os.path.join(str_dir, f"{mol_basename}.{ext}") + np.savez_compressed(q_shells_filepath, + q_shell_coords=q_shell_coords, + radii=radii) + + run(self.session, f"close #{mol.id[0]}") + + + + def sim_button_clicked(self): + str_dir = self.sim_dir.text() + + for file_name in sorted(os.listdir(str_dir)): + file_path = os.path.join(str_dir, file_name) + file_extension = os.path.splitext(file_path)[1].lower() + if file_extension in ['.pdb', '.cif']: + structure = run(self.session, f'open {file_path}')[0] + structure_basename = file_name.split('.')[0] + + mrc_filename = f"{structure_basename}.mrc" + mrc_filepath = os.path.join(str_dir, mrc_filename) + + vol = run(self.session, f'molmap #{structure.id[0]} {self.sim_resolution.value()} gridSpacing 1.0') + run(self.session, f"save {mrc_filepath} #{vol.id[0]}") + run(self.session, f"close #{vol.id[0]}") + run(self.session, f"close #{structure.id[0]}") def split_button_clicked(self): From 03a4d38e7eaf35f6259c2215187943bf5f378532 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 5 Dec 2024 12:16:42 +0300 Subject: [PATCH 43/79] q scores for interactive and disk modes --- src/q_shells.py | 30 ++++++++++-------- src/tablemodel.py | 2 +- src/tool.py | 81 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 92 insertions(+), 21 deletions(-) diff --git a/src/q_shells.py b/src/q_shells.py index 7917130..81c5b6c 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -202,7 +202,7 @@ def min_max_d(v): return min_d, max_d -def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, +def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volume, fit_res_clusters, fit_res_all, ref_sigma=0.6, points_per_shell=8, device="cuda"): shifts = [] @@ -228,10 +228,6 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, quaternions_matrices = quaternion_to_matrix_batch(quaternions.unsqueeze(0)) - q_shell_coords, radii = generate_q_shells(centered_mol) - q_shell_coords = torch.tensor(q_shell_coords, device=device).float() - q_shell_coords = q_shell_coords.reshape([-1, 3]) - vol_matrix = volume.full_matrix() vol_origin_and_step = volume.data_origin_and_step() target_origin = vol_origin_and_step[0] @@ -255,20 +251,26 @@ def q_scores_for_clusters(centered_mol, volume, fit_res_clusters, fit_res_all, a = max_d - min_d b = min_d - num_shells = len(radii) - q_reference_gaussian = a * np.exp(-0.5 * (radii / ref_sigma) ** 2) + b - q_ref = np.concatenate([[q_reference_gaussian[0]] * points_per_shell, - *[[q_reference_gaussian[j]] * points_per_shell for j in range(num_shells - 1)]]) - q_ref = torch.tensor(q_ref, device=device, dtype=torch.float32) - q_ref -= q_ref.mean() q_scores = [] - for row in range(len(fit_res_clusters)): - transformed_coords = torch.matmul(q_shell_coords, quaternions_matrices[:, row:row + 1, :, :]) + for cluster_idx in range(len(fit_res_clusters)): + mol_idx = int(fit_res_clusters[cluster_idx, 0]) + + # q ref + num_shells = len(q_shell_radii_np_list[mol_idx]) + q_reference_gaussian = a * np.exp(-0.5 * (q_shell_radii_np_list[mol_idx] / ref_sigma) ** 2) + b + q_ref = np.concatenate([[q_reference_gaussian[0]] * points_per_shell, + *[[q_reference_gaussian[j]] * points_per_shell for j in range(num_shells - 1)]]) + q_ref = torch.tensor(q_ref, device=device, dtype=torch.float32) + q_ref -= q_ref.mean() + + # q shells q scores + q_shell_coords = q_shell_coords_torch_list[mol_idx] + transformed_coords = torch.matmul(q_shell_coords, quaternions_matrices[:, cluster_idx:cluster_idx + 1, :, :]) - transformed_coords += shifts[row, :] + transformed_coords += shifts[cluster_idx, :] q_shell_coords_normalized_to_target = normalize_coordinates_to_map_origin_torch(transformed_coords, target_size_x_y_z_tensor, diff --git a/src/tablemodel.py b/src/tablemodel.py index b7c25dc..1ece1d2 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -62,7 +62,7 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): elif column == 1: return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: - if self._q_scores_np: + if self._q_scores_np is not None: return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals return None elif column == 3: diff --git a/src/tool.py b/src/tool.py index 021f45a..3b1ef49 100644 --- a/src/tool.py +++ b/src/tool.py @@ -45,7 +45,35 @@ import platform import ast from scipy.interpolate import interp1d - + + +def generate_q_shells_wrapper(q_shell_generator, mol_path, q_shells_ext, session): + print(f"Q shell generator: {q_shell_generator}: {mol_path}") + + mol_basename = mol_path.split('.')[0] + mol_folder = os.path.dirname(mol_path) + + mol = run(session, f'open {mol_path}')[0] + + # center mol + from chimerax.geometry import Place + mol_center = mol.atoms.coords.mean(axis=0) + transform = Place(origin=-mol_center) + mol.atoms.transform(transform) + mol.position = Place() + + q_shell_coords, radii = q_shell_generator(mol) + + q_shells_filepath = os.path.join(mol_folder, f"{mol_basename}.{q_shells_ext}") + np.savez_compressed(q_shells_filepath, + q_shell_coords=q_shell_coords, + radii=radii) # Not atomic warning: q_scores_points_per_shell not saved!!! + + run(session, f"close #{mol.id[0]}") + + return q_shell_coords, radii + + def create_row(parent_layout, left=0, top=0, right=0, bottom=0, spacing=5): row_frame = QFrame() @@ -1418,13 +1446,54 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, self.proxyModel = None return - # Calculate Q-scores + # ======= Calculate Q-scores q_scores_np = None - if self.mol: - from .q_shells import q_scores_for_clusters - q_scores_np = q_scores_for_clusters(self.mol, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) + q_shells_mode = "Full" + + q_scores_points_per_shell = 8 + q_scores_max_rad = 2.0 + q_scores_step = 0.1 + q_shell_radii = np.arange(0, q_scores_max_rad + q_scores_step / 2, q_scores_step) + + q_shell_coords_torch_list = [] + q_shell_radii_np_list = [] + + from .q_shells import generate_q_shells, generate_q_shells_simple, q_scores_for_clusters + + q_shell_generator = None + q_shells_ext = "" + if q_shells_mode == "Full": + q_shell_generator = generate_q_shells + q_shells_ext = "centered_q_shells.full.npz" + elif q_shells_mode == "Simple": + q_shell_generator = generate_q_shells_simple + q_shells_ext = "centered_q_shells.simple.npz" + + for mol_path in mol_paths: + mol_basename = mol_path.split('.')[0] + mol_folder = os.path.dirname(mol_path) + q_shells_filepath = os.path.join(mol_folder, f"{mol_basename}.{q_shells_ext}") + + if os.path.exists(q_shells_filepath): + q_shells_np = np.load(q_shells_filepath) + q_shell_coords = q_shells_np['q_shell_coords'] + q_shell_radii = q_shells_np['radii'] # Not atomic warning: q_scores_points_per_shell not saved!!! + else: + q_shell_coords, q_shell_radii = generate_q_shells_wrapper(q_shell_generator, + mol_path, + q_shells_ext, + self.session) + + q_shell_coords = torch.tensor(q_shell_coords, device=self._device.currentText()).float() + q_shell_coords = q_shell_coords.reshape([-1, 3]) + + q_shell_coords_torch_list.append(q_shell_coords) + q_shell_radii_np_list.append(q_shell_radii) + + q_scores_np = q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) + # ======= Create fit results table self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, q_scores_np) self.proxyModel = QSortFilterProxyModel() self.proxyModel.setSourceModel(self.model) @@ -1689,7 +1758,7 @@ def q_shell_button_clicked(self, mode: str): q_shells_filepath = os.path.join(str_dir, f"{mol_basename}.{ext}") np.savez_compressed(q_shells_filepath, q_shell_coords=q_shell_coords, - radii=radii) + radii=radii) # Not atomic warning: q_scores_points_per_shell not saved!!! run(self.session, f"close #{mol.id[0]}") From 4b16c6a9b79c7ef7ec1d7e280d71b29d5237b168 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 5 Dec 2024 13:21:27 +0300 Subject: [PATCH 44/79] pass device param to q_scores_for_clusters --- src/tool.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 3b1ef49..2787014 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1490,7 +1490,12 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, q_shell_coords_torch_list.append(q_shell_coords) q_shell_radii_np_list.append(q_shell_radii) - q_scores_np = q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log) + q_scores_np = q_scores_for_clusters(q_shell_coords_torch_list, + q_shell_radii_np_list, + self.vol, + self.e_sqd_clusters_ordered, + self.e_sqd_log, + device=self._device.currentText()) # ======= Create fit results table From d78fdee1d035f6d2250462a9d4fb13f709da3e13 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 5 Dec 2024 13:42:36 +0300 Subject: [PATCH 45/79] removed syntax warning for "is" and "is not" --- src/tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tool.py b/src/tool.py index 2787014..b103e4c 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1705,17 +1705,17 @@ def single_fit_button_clicked(self): def dependency_install_button_clicked(self): - if self.dependency_name.text() is "": + if self.dependency_name.text() == "": self.session.logger.error("You have to specify a package name.") return package_name = self.dependency_name.text() - if self.dependency_version.text() is not "": + if self.dependency_version.text() != "": package_name += f"=={self.dependency_version.text()}" cmd_list = ["install", package_name] - if self.dependency_index_url.text() is not "": + if self.dependency_index_url.text() != "": cmd_list.extend(["--index-url", self.dependency_index_url.text()]) cmd_list.extend([ From 24ad41240c94ca3da161861cf81b3f54f294b035 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 5 Dec 2024 17:50:14 +0300 Subject: [PATCH 46/79] clustering and q scores log --- src/DiffAtomComp.py | 29 +++++++++++++++++++---- src/q_shells.py | 15 ++++++------ src/tool.py | 56 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index c784e65..ac00590 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -99,7 +99,9 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 sort_column_idx: int = 7, in_contour_threshold: float = 0.5, correlation_threshold: float = 0.5, - df_cid_threshold: float = 0.15): + df_cid_threshold: float = 0.15, + save_log=False, + log_path=""): """ Cluster the fitting results in sqd table by thresholding on shift and quaternion Return the sorted cluster representatives @@ -140,6 +142,11 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 # Use the generated meshgrid and max_sort_column_metric_idx to index into e_sqd_log sqd_highest_corr_np = e_sqd_log[dims_0, dims_1, max_sort_column_metric_idx] + timer_start = datetime.now() + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"DiffFit fit_res filtering starts: {timer_start}\n") + fit_res_filtered = [] fit_res_filtered_indices = [] in_contour_col_idx = 11 @@ -168,13 +175,19 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 fit_res_filtered.append(filtered_array) fit_res_filtered_indices.append(filtered_indices[0]) + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"DiffFit fit_res filtering time elapsed: {datetime.now() - timer_start}\n" + f"-------\n") sqd_clusters = [] for mol_idx in range(N_mol): mol_shift = fit_res_filtered[mol_idx][:, :3] mol_q = fit_res_filtered[mol_idx][:, 3:7] - print(f"Clustering {len(mol_shift)} fits") + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"Clustering {len(mol_shift)} fits for mol_idx: {mol_idx}\n") timer_start = datetime.now() T = [] @@ -190,7 +203,9 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 transformation = Place(matrix=T_matrix) T.append(transformation) - print(f"Convert to matrix time: {datetime.now() - timer_start}") + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"Convert to matrix time: {datetime.now() - timer_start}\n") timer_start = datetime.now() b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_centers[mol_idx]) @@ -209,7 +224,13 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 mol_transform_label.append(T_ID_dict[id(close[0])]) T_ID_dict[id(ptf)] = T_ID_dict[id(close[0])] - print(f"ChimeraX bin clustering: {datetime.now() - timer_start}") + if save_log and (i + 1) % 10000 == 0: + with open(log_path, "a") as log_file: + log_file.write(f"Clustered {i+1} fits: {datetime.now()}\n") + + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"ChimeraX bin clustering: {datetime.now() - timer_start}\n") unique_labels, indices, counts = np.unique(mol_transform_label, axis=0, return_inverse=True, return_counts=True) diff --git a/src/q_shells.py b/src/q_shells.py index 81c5b6c..5b03e26 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -1,5 +1,6 @@ import torch import numpy as np +from datetime import datetime from .DiffAtomComp import quaternion_to_matrix_batch, normalize_coordinates_to_map_origin_torch def unit_sphere_vertices(num_vertices): @@ -53,7 +54,6 @@ def generate_q_shells(mol, - a numpy array with q shells coordinates - radii ''' - from datetime import datetime global_timer_start = datetime.now() from chimerax.geometry import find_close_points, find_closest_points, Places @@ -203,7 +203,9 @@ def min_max_d(v): def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volume, fit_res_clusters, fit_res_all, - ref_sigma=0.6, points_per_shell=8, device="cuda"): + ref_sigma=0.6, points_per_shell=8, device="cuda", + save_log=False, + log_path=""): shifts = [] quaternions = [] @@ -252,8 +254,6 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu b = min_d - - q_scores = [] for cluster_idx in range(len(fit_res_clusters)): mol_idx = int(fit_res_clusters[cluster_idx, 0]) @@ -292,6 +292,10 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu q_scores.append(q_score_torch[~torch.isnan(q_score_torch)].mean()) + if save_log and (cluster_idx + 1) % 1000 == 0: + with open(log_path, "a") as log_file: + log_file.write(f"Clustered {cluster_idx + 1} fits: {datetime.now()}\n") + q_scores_tensor = torch.stack(q_scores) # top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) # @@ -409,9 +413,6 @@ def generate_q_shells_parallel(mol, points_per_shell=8, max_rad=2.0, step=0.1, num_test_points=128, clustering_iterations=5, include_h=False, randomize_shell_points=True, random_seed=RANDOM_SEED): - from datetime import datetime - - from datetime import datetime global_timer_start = datetime.now() query_atoms = mol.atoms diff --git a/src/tool.py b/src/tool.py index b103e4c..a5a6fd3 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1410,7 +1410,11 @@ def select_clicked(self, text, target, save = False, pattern = "dir"): return fileName, ext - def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, target_surface_threshold=None): + def show_results(self, e_sqd_log, mol_centers, mol_paths, + target_vol_path=None, + target_surface_threshold=None, + save_log=False, + log_path=""): if e_sqd_log is None: return @@ -1426,12 +1430,16 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, self.vol = run(self.session, "open {0}".format(target_vol_path))[0] run(self.session,f"volume #{self.vol.id[0]} level {target_surface_threshold}") - # TODO: define mol_centers - elif self.fit_input_mode == "interactive": self.vol = self.fit_vol self.vol.display = True + timer_start = datetime.now() + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit clustering starts: {timer_start}\n") + N_mol, N_quat, N_shift, N_iter, N_metric = e_sqd_log.shape self.e_sqd_log = e_sqd_log.reshape([N_mol, N_quat * N_shift, N_iter, N_metric]) self.e_sqd_clusters_ordered = cluster_and_sort_sqd_fast(self.e_sqd_log, mol_centers, @@ -1439,7 +1447,13 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, self.settings.clustering_angle_tolerance, in_contour_threshold=self.settings.clustering_in_contour_threshold, correlation_threshold=self.settings.clustering_correlation_threshold, - df_cid_threshold=self.settings.df_cid_threshold) + df_cid_threshold=self.settings.df_cid_threshold, + save_log=save_log, + log_path=log_path) + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit clustering time elapsed: {datetime.now() - timer_start}\n") if self.e_sqd_clusters_ordered is None: self.session.logger.error("No result under these thresholds. Please decrease \"In contour threshold\" or \"Correlation threshold\" or rerun the fitting!") @@ -1447,6 +1461,13 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, return # ======= Calculate Q-scores + + timer_start = datetime.now() + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit Q-scores calculation starts: {timer_start}\n") + q_scores_np = None q_shells_mode = "Full" @@ -1490,13 +1511,24 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, target_vol_path=None, q_shell_coords_torch_list.append(q_shell_coords) q_shell_radii_np_list.append(q_shell_radii) + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"Q-scores prep time elapsed: {datetime.now() - timer_start}\n" + f"-------\n") + q_scores_np = q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, self.vol, self.e_sqd_clusters_ordered, self.e_sqd_log, - device=self._device.currentText()) - + device=self._device.currentText(), + save_log=save_log, + log_path=log_path) + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"-------\n" + f"DiffFit Q-scores calculation time elapsed: {datetime.now() - timer_start}\n" + f"-------\n\n") # ======= Create fit results table self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, q_scores_np) @@ -1685,7 +1717,9 @@ def single_fit_button_clicked(self): self._view_input_mode.setCurrentText("interactive") self._view_input_mode_changed() self.interactive_fit_result_ready = True - self.show_results(self.fit_result, self.mol_centers, self.mol_paths) + self.show_results(self.fit_result, self.mol_centers, self.mol_paths, + save_log=_save_results, + log_path=f"{_out_dir}/log.log") self.tab_widget.setCurrentWidget(self.tab_view_group) @@ -1898,7 +1932,9 @@ def run_button_clicked(self): mol_centers, mol_paths, target_vol_path, - target_surface_threshold) + target_surface_threshold, + save_log=True, + log_path=f"{_out_dir}/log.log") self.tab_widget.setCurrentWidget(self.tab_view_group) self.select_table_item(0) @@ -1944,7 +1980,9 @@ def load_button_clicked(self): mol_centers = fit_res['mol_centers'] opt_res = fit_res['opt_res'] - self.show_results(opt_res, mol_centers, mol_paths, target_vol_path, target_surface_threshold) + self.show_results(opt_res, mol_centers, mol_paths, target_vol_path, target_surface_threshold, + save_log=True, + log_path=f"{datasetoutput}/log.log") self.select_table_item(0) run(self.session, f"view orient") From 9236e221c7d65deb63a85fed3250b1243d97e45e Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 5 Dec 2024 17:55:45 +0300 Subject: [PATCH 47/79] q scores log total number of clusters --- src/q_shells.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/q_shells.py b/src/q_shells.py index 5b03e26..adb11fe 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -253,6 +253,10 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu a = max_d - min_d b = min_d + if save_log: + with open(log_path, "a") as log_file: + log_file.write(f"Starting to cluster {len(fit_res_clusters)} fits: {datetime.now()}\n" + f"-------\n") q_scores = [] for cluster_idx in range(len(fit_res_clusters)): From c6aa550b67db837c526da78b6eac97dab7e12427 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 6 Dec 2024 11:29:53 +0300 Subject: [PATCH 48/79] better Q-scores log --- src/q_shells.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/q_shells.py b/src/q_shells.py index adb11fe..e5ed840 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -255,7 +255,7 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu if save_log: with open(log_path, "a") as log_file: - log_file.write(f"Starting to cluster {len(fit_res_clusters)} fits: {datetime.now()}\n" + log_file.write(f"Starting to calculate Q-scores for {len(fit_res_clusters)} fits: {datetime.now()}\n" f"-------\n") q_scores = [] @@ -298,7 +298,7 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu if save_log and (cluster_idx + 1) % 1000 == 0: with open(log_path, "a") as log_file: - log_file.write(f"Clustered {cluster_idx + 1} fits: {datetime.now()}\n") + log_file.write(f"Q-scoring {cluster_idx + 1} fits: {datetime.now()}\n") q_scores_tensor = torch.stack(q_scores) # top_10_values, top_10_indices = torch.topk(q_scores_tensor, k=10) From a36e33db4a6737562f058b1dcd97eef6f31bb56b Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 6 Dec 2024 11:50:30 +0300 Subject: [PATCH 49/79] try except safeguard for Q-scores --- src/tablemodel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tablemodel.py b/src/tablemodel.py index 1ece1d2..4e498dc 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -62,9 +62,10 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): elif column == 1: return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: - if self._q_scores_np is not None: + try: return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals - return None + except: + return None elif column == 3: return int(self._sqd_cluster_data[index.row(), 3]) elif 4 <= column <= 11: From 51a1d02d11fcc595f27a2eceb0c843d70afe659e Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 6 Dec 2024 17:57:06 +0300 Subject: [PATCH 50/79] clustering bfactor=2 --- src/DiffAtomComp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index ac00590..75c3516 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -208,7 +208,7 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 log_file.write(f"Convert to matrix time: {datetime.now() - timer_start}\n") timer_start = datetime.now() - b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_centers[mol_idx]) + b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_centers[mol_idx], bfactor=2) mol_transform_label = [] unique_id = 0 T_ID_dict = {} From 66ee2aa83ccc2e51cf48f3b0c0a89e2517a6f89d Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sat, 7 Dec 2024 13:58:37 +0300 Subject: [PATCH 51/79] mol_center as [0.0, 0.0, 0.0] in clustering --- src/DiffAtomComp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 75c3516..891ae6d 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -124,6 +124,8 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 @param sort_column_idx: the column to sort, 9-th column is the correlation @return: cluster representative table sorted in descending order """ + mol_center = np.array([0.0, 0.0, 0.0]) + from chimerax.geometry import bins from chimerax.geometry import Place @@ -208,7 +210,7 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 log_file.write(f"Convert to matrix time: {datetime.now() - timer_start}\n") timer_start = datetime.now() - b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_centers[mol_idx], bfactor=2) + b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_center, bfactor=2) mol_transform_label = [] unique_id = 0 T_ID_dict = {} From 67d4e97560356f5faf3dbc64f3297a3001755943 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sat, 7 Dec 2024 14:24:24 +0300 Subject: [PATCH 52/79] volume.data.step related bug Was using data_origin_and_step() Causing bad Q-scores when the step default to 2 upon opening --- src/parse_log.py | 2 +- src/q_shells.py | 5 ++--- src/tool.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/parse_log.py b/src/parse_log.py index 5fed89b..8fe40b6 100644 --- a/src/parse_log.py +++ b/src/parse_log.py @@ -180,7 +180,7 @@ def simulate_volume(session, vol, mol_paths, mol_idx, transformation, res=4.0): mol.atoms.transform(transformation) from chimerax.map.molmap import molecule_map - mol_vol = molecule_map(session, mol.atoms, res, grid_spacing=vol.data_origin_and_step()[1][0]) + mol_vol = molecule_map(session, mol.atoms, res, grid_spacing=vol.data.step) mol.delete() return mol_vol diff --git a/src/q_shells.py b/src/q_shells.py index e5ed840..6f0b9ce 100644 --- a/src/q_shells.py +++ b/src/q_shells.py @@ -231,9 +231,8 @@ def q_scores_for_clusters(q_shell_coords_torch_list, q_shell_radii_np_list, volu vol_matrix = volume.full_matrix() - vol_origin_and_step = volume.data_origin_and_step() - target_origin = vol_origin_and_step[0] - target_steps = vol_origin_and_step[1] + target_origin = np.array(volume.data.origin) + target_steps = np.array(volume.data.step) target_no_negative = vol_matrix target = torch.tensor(target_no_negative, device=device).float() diff --git a/src/tool.py b/src/tool.py index a5a6fd3..b3d18f8 100644 --- a/src/tool.py +++ b/src/tool.py @@ -2019,7 +2019,7 @@ def simulate_volume_clicked(self): res) elif self.fit_input_mode == "interactive": from chimerax.map.molmap import molecule_map - self.mol_vol = molecule_map(self.session, self.mol.atoms, res, grid_spacing=self.vol.data_origin_and_step()[1][0] / 3) + self.mol_vol = molecule_map(self.session, self.mol.atoms, res, grid_spacing=self.vol.data.step / 3) return From 38cc28384238b31565828f40ad00a016d129eb4f Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sat, 7 Dec 2024 14:58:55 +0300 Subject: [PATCH 53/79] call view command at the end of show results --- src/tool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tool.py b/src/tool.py index b3d18f8..677d4c8 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1546,6 +1546,8 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, self.mol_paths = mol_paths self.cluster_idx = 0 + run(self.session, "view") + def _create_volume_conv_list(self, vol, smooth_by, smooth_loops, session, negative_space_value=-0.5): # From here on, there are three strategies for utilizing gaussian smooth # 1. with increasing sDev on the same input volume From 1b01c403e2cf50d1e616b01320d2c9a095f08272 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sun, 8 Dec 2024 19:55:47 +0300 Subject: [PATCH 54/79] Candidates UI in the View tab --- src/tool.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 677d4c8..c9b9788 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1096,7 +1096,20 @@ def build_view_ui(self, layout): stats.setText("Stats: ") self.stats = stats layout.addWidget(stats, row, 0, 1, 3) - row = row + 1 + row = row + 1 + + # Adding "Candidates" field and Save button + candidates_label = QLabel("Candidates: ") + self.candidates_field = QLineEdit() + self.candidates_field.setText("") + # self.candidates_field.setText(", ".join(map(str, self.get_selected_indices()))) + save_button = QPushButton("Save structures") + # save_button.clicked.connect(self.save_candidates) + + layout.addWidget(candidates_label, row, 0) + layout.addWidget(self.candidates_field, row, 1) + layout.addWidget(save_button, row, 2) + row += 1 # button panel simulate_volume_label = QLabel("Resolution:") From dc84f378b7a69154a181ff1d1356f9e78aaf0839 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Sun, 8 Dec 2024 20:16:25 +0300 Subject: [PATCH 55/79] get candidates indices done, save structures placeholder --- src/tool.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/tool.py b/src/tool.py index c9b9788..2e273fd 100644 --- a/src/tool.py +++ b/src/tool.py @@ -47,6 +47,32 @@ from scipy.interpolate import interp1d +def calculate_candidate_indices(q_scores, num_test_fits=20): + """Calculates the candidate indices based on Q-scores.""" + try: + # Get the indices of the top 20 values (num_test_fits) in descending order + top_fits_indices = np.argpartition(q_scores, -num_test_fits)[-num_test_fits:] + top_fits_values = q_scores[top_fits_indices] + + # Sort the top values and their indices in descending order + sorted_indices_desc = np.argsort(-top_fits_values) + top_fits_indices_desc = top_fits_indices[sorted_indices_desc] + + # Calculate consecutive differences and negate them + consecutive_differences = -np.diff(top_fits_values[sorted_indices_desc]) + std = np.std(consecutive_differences) + largest_gap_index = np.argmax(consecutive_differences) + largest_gap_ratio = consecutive_differences[largest_gap_index] / std + + if largest_gap_ratio > 1: + return top_fits_indices_desc[:largest_gap_index + 1] + else: + return [] + except Exception as e: + print(f"Error calculating selected indices: {e}") + return [] + + def generate_q_shells_wrapper(q_shell_generator, mol_path, q_shells_ext, session): print(f"Q shell generator: {q_shell_generator}: {mol_path}") @@ -1102,9 +1128,8 @@ def build_view_ui(self, layout): candidates_label = QLabel("Candidates: ") self.candidates_field = QLineEdit() self.candidates_field.setText("") - # self.candidates_field.setText(", ".join(map(str, self.get_selected_indices()))) save_button = QPushButton("Save structures") - # save_button.clicked.connect(self.save_candidates) + save_button.clicked.connect(self.save_candidates) layout.addWidget(candidates_label, row, 0) layout.addWidget(self.candidates_field, row, 1) @@ -1537,6 +1562,9 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, device=self._device.currentText(), save_log=save_log, log_path=log_path) + + self.candidates_field.setText(", ".join(map(str, calculate_candidate_indices(q_scores_np) + 1))) + if save_log: with open(log_path, "a") as log_file: log_file.write(f"-------\n" @@ -1554,7 +1582,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, self.view.reset() self.view.show() - self.stats.setText("stats: {0} entries".format(self.model.rowCount())) + self.stats.setText("stats: {0} entries".format(self.model.rowCount())) self.mol_paths = mol_paths self.cluster_idx = 0 @@ -2037,6 +2065,11 @@ def simulate_volume_clicked(self): self.mol_vol = molecule_map(self.session, self.mol.atoms, res, grid_spacing=self.vol.data.step / 3) return + + def save_candidates(self): + """Save candidates action triggered by the Save button.""" + candidates = self.candidates_field.text() + self.session.logger.info(f"Saving candidates: {candidates}") def zero_density_button_clicked(self): if self.vol is None: From de3e37829ef2609e079893194f3e274c00fde2d5 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 9 Dec 2024 13:36:46 +0300 Subject: [PATCH 56/79] sort the View table by Q-scores --- src/tablemodel.py | 8 +++----- src/tool.py | 9 ++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/tablemodel.py b/src/tablemodel.py index 4e498dc..76e66a8 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -3,15 +3,13 @@ class TableModel(QAbstractTableModel): """A model to interface a Qt view with pandas dataframe """ - def __init__(self, sqd_cluster_data, sqd_data, mol_paths, q_scores_np, parent=None): + def __init__(self, sqd_cluster_data, sqd_data, mol_paths, parent=None): QAbstractTableModel.__init__(self, parent) self._sqd_data = sqd_data self._sqd_cluster_data = sqd_cluster_data import os self._mol_names = [os.path.splitext(os.path.basename(path))[0] for path in mol_paths] - self._q_scores_np = q_scores_np - self._header = ["Id", "Mol name", "Q-score", "Hits", "Density (normalized)", "Overlap", "Correlation", "Cam", "Inside", @@ -63,11 +61,11 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): return str(f"{mol_idx}-{self._mol_names[mol_idx]}") elif column == 2: try: - return float(round(float(self._q_scores_np[index.row()]) * 10000)) / 10000.0 # for 4 decimals + return float(round(float(self._sqd_cluster_data[index.row(), 3]) * 10000)) / 10000.0 # for 4 decimals except: return None elif column == 3: - return int(self._sqd_cluster_data[index.row(), 3]) + return int(self._sqd_cluster_data[index.row(), 4]) elif 4 <= column <= 11: record_row = self._sqd_data[mol_idx, record_idx, iter_idx] return float(round(float(record_row[column + 3]) * 10000)) / 10000.0 # for 4 decimals diff --git a/src/tool.py b/src/tool.py index 2e273fd..4f91316 100644 --- a/src/tool.py +++ b/src/tool.py @@ -56,7 +56,6 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): # Sort the top values and their indices in descending order sorted_indices_desc = np.argsort(-top_fits_values) - top_fits_indices_desc = top_fits_indices[sorted_indices_desc] # Calculate consecutive differences and negate them consecutive_differences = -np.diff(top_fits_values[sorted_indices_desc]) @@ -65,7 +64,7 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): largest_gap_ratio = consecutive_differences[largest_gap_index] / std if largest_gap_ratio > 1: - return top_fits_indices_desc[:largest_gap_index + 1] + return np.array(range(largest_gap_index + 1)) else: return [] except Exception as e: @@ -1571,8 +1570,12 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, f"DiffFit Q-scores calculation time elapsed: {datetime.now() - timer_start}\n" f"-------\n\n") + q_score_column = 3 + self.e_sqd_clusters_ordered = np.insert(self.e_sqd_clusters_ordered, q_score_column, q_scores_np, axis=1) + self.e_sqd_clusters_ordered = self.e_sqd_clusters_ordered[self.e_sqd_clusters_ordered[:, q_score_column].argsort()[::-1]] + # ======= Create fit results table - self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, q_scores_np) + self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths) self.proxyModel = QSortFilterProxyModel() self.proxyModel.setSourceModel(self.model) From d3af4d297c7aac75836869a45af7ff1bd6ea807c Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 9 Dec 2024 13:57:35 +0300 Subject: [PATCH 57/79] save candidates --- src/tool.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/tool.py b/src/tool.py index 4f91316..b6ccf77 100644 --- a/src/tool.py +++ b/src/tool.py @@ -2071,8 +2071,18 @@ def simulate_volume_clicked(self): def save_candidates(self): """Save candidates action triggered by the Save button.""" - candidates = self.candidates_field.text() - self.session.logger.info(f"Saving candidates: {candidates}") + try: + candidates = self.candidates_field.text() + candidate_ids = candidates.split(",") + + for candidate_id in candidate_ids: + # Strip any leading/trailing spaces (if any) + candidate_id = candidate_id.strip() + self.select_table_item(int(candidate_id) - 1) + run(self.session, f"save {self.settings.view_output_directory}/{self.mol.name}.cif models #{self.mol.id[0]}") + self.session.logger.info(f"Saved candidate_id: {candidates}") + except: + self.session.logger.error("Failed to parse the candidates field.") def zero_density_button_clicked(self): if self.vol is None: From 17f459a48d98a7de931e2dc3dc8c799fded55395 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Mon, 9 Dec 2024 22:44:42 +0300 Subject: [PATCH 58/79] fit results clustering speed up, a bit; see #44 --- src/DiffAtomComp.py | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 891ae6d..97ab813 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -27,6 +27,32 @@ # Ignore PDBConstructionWarning for unrecognized 'END' record warnings.filterwarnings("ignore", message="Ignoring unrecognized record 'END'", category=PDBConstructionWarning) +from chimerax.geometry.bins import Binned_Transforms + +class DiffFit_Binned_Transforms(Binned_Transforms): + def __init__(self, angle, translation, center=(0, 0, 0), bfactor=2): + super().__init__(angle, translation, center, bfactor) + + def one_close_transform(self, tf): + + a, x, y, z = c = self.bin_point(tf) + clist = self.bins.close_objects(c, self.spacing) + if len(clist) == 0: + return None + + itf = tf.inverse() + d2max = self.translation * self.translation + for ctf in clist: + cx, cy, cz = ctf * self.center + dx, dy, dz = x - cx, y - cy, z - cz + d2 = dx * dx + dy * dy + dz * dz + if d2 <= d2max: + dtf = ctf * itf + a = dtf.rotation_angle() + if a < self.angle: + return ctf + + return None def interpolate_coords(coords, inter_folds, inter_kind='quadratic'): """Interpolate backbone coordinates.""" @@ -126,7 +152,6 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 """ mol_center = np.array([0.0, 0.0, 0.0]) - from chimerax.geometry import bins from chimerax.geometry import Place N_mol, N_record, N_iter, N_metric = e_sqd_log.shape @@ -210,21 +235,27 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 log_file.write(f"Convert to matrix time: {datetime.now() - timer_start}\n") timer_start = datetime.now() - b = bins.Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_center, bfactor=2) + b = DiffFit_Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_center, bfactor=1) mol_transform_label = [] unique_id = 0 T_ID_dict = {} for i in range(len(mol_shift)): ptf = T[i] - close = b.close_transforms(ptf) - if len(close) == 0: + coord = [c / s for c, s in zip(b.bin_point(ptf), b.bins.bin_size)] + cbin = b.bins.close_bins(coord, (0, 0, 0, 0))[0] + close = None + if cbin in b.bins.bins: + close = b.bins.bins[cbin][0][1] + else: + close = b.one_close_transform(ptf) + if close is None: b.add_transform(ptf) mol_transform_label.append(unique_id) T_ID_dict[id(ptf)] = unique_id unique_id = unique_id + 1 else: - mol_transform_label.append(T_ID_dict[id(close[0])]) - T_ID_dict[id(ptf)] = T_ID_dict[id(close[0])] + mol_transform_label.append(T_ID_dict[id(close)]) + T_ID_dict[id(ptf)] = T_ID_dict[id(close)] if save_log and (i + 1) % 10000 == 0: with open(log_path, "a") as log_file: From 42f03d95dbe640ae10a64ae41839e4d9da25c1d3 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 10 Dec 2024 11:01:32 +0300 Subject: [PATCH 59/79] fallback the buggy clustering; see #44 --- src/DiffAtomComp.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 97ab813..53a6b9a 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -29,11 +29,12 @@ from chimerax.geometry.bins import Binned_Transforms + class DiffFit_Binned_Transforms(Binned_Transforms): def __init__(self, angle, translation, center=(0, 0, 0), bfactor=2): super().__init__(angle, translation, center, bfactor) - def one_close_transform(self, tf): + def one_in_cluster_transform(self, tf): a, x, y, z = c = self.bin_point(tf) clist = self.bins.close_objects(c, self.spacing) @@ -235,27 +236,21 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 log_file.write(f"Convert to matrix time: {datetime.now() - timer_start}\n") timer_start = datetime.now() - b = DiffFit_Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_center, bfactor=1) + b = DiffFit_Binned_Transforms(angle_tolerance * pi / 180, shift_tolerance, mol_center) mol_transform_label = [] unique_id = 0 T_ID_dict = {} for i in range(len(mol_shift)): ptf = T[i] - coord = [c / s for c, s in zip(b.bin_point(ptf), b.bins.bin_size)] - cbin = b.bins.close_bins(coord, (0, 0, 0, 0))[0] - close = None - if cbin in b.bins.bins: - close = b.bins.bins[cbin][0][1] - else: - close = b.one_close_transform(ptf) - if close is None: + in_cluster = b.one_in_cluster_transform(ptf) + if in_cluster is None: b.add_transform(ptf) mol_transform_label.append(unique_id) T_ID_dict[id(ptf)] = unique_id unique_id = unique_id + 1 else: - mol_transform_label.append(T_ID_dict[id(close)]) - T_ID_dict[id(ptf)] = T_ID_dict[id(close)] + mol_transform_label.append(T_ID_dict[id(in_cluster)]) + T_ID_dict[id(ptf)] = T_ID_dict[id(in_cluster)] if save_log and (i + 1) % 10000 == 0: with open(log_path, "a") as log_file: From 9b6e39d7ccd38ceaef1fbb392b3364fca49d6a6c Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 10 Dec 2024 13:52:08 +0300 Subject: [PATCH 60/79] save candidates folder and file name counter --- src/tool.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/tool.py b/src/tool.py index b6ccf77..985f1b2 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1124,14 +1124,23 @@ def build_view_ui(self, layout): row = row + 1 # Adding "Candidates" field and Save button - candidates_label = QLabel("Candidates: ") - self.candidates_field = QLineEdit() - self.candidates_field.setText("") + candidates_folder_label = QLabel("Candidates Folder:") + self.candidates_folder = QLineEdit() + candidates_folder_select = QPushButton("Select") + candidates_folder_select.clicked.connect(lambda: self.select_clicked("Save candidates to", self.candidates_folder)) + layout.addWidget(candidates_folder_label, row, 0) + layout.addWidget(self.candidates_folder, row, 1) + layout.addWidget(candidates_folder_select, row, 2) + row = row + 1 + + candidates_label = QLabel("Candidates id: ") + self.candidates_id = QLineEdit() + self.candidates_id.setText("") save_button = QPushButton("Save structures") save_button.clicked.connect(self.save_candidates) layout.addWidget(candidates_label, row, 0) - layout.addWidget(self.candidates_field, row, 1) + layout.addWidget(self.candidates_id, row, 1) layout.addWidget(save_button, row, 2) row += 1 @@ -1562,7 +1571,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, save_log=save_log, log_path=log_path) - self.candidates_field.setText(", ".join(map(str, calculate_candidate_indices(q_scores_np) + 1))) + self.candidates_id.setText(", ".join(map(str, calculate_candidate_indices(q_scores_np) + 1))) if save_log: with open(log_path, "a") as log_file: @@ -2072,17 +2081,26 @@ def simulate_volume_clicked(self): def save_candidates(self): """Save candidates action triggered by the Save button.""" try: - candidates = self.candidates_field.text() + candidates = self.candidates_id.text() candidate_ids = candidates.split(",") for candidate_id in candidate_ids: # Strip any leading/trailing spaces (if any) candidate_id = candidate_id.strip() self.select_table_item(int(candidate_id) - 1) - run(self.session, f"save {self.settings.view_output_directory}/{self.mol.name}.cif models #{self.mol.id[0]}") + + base_name, ext = os.path.splitext(self.mol.name) + base_file_path = f"{self.candidates_folder.text()}/{base_name}" + counter = 1 + file_path = f"{base_file_path}_{counter}{ext}" + while os.path.exists(file_path): + file_path = f"{base_file_path}_{counter}{ext}" + counter += 1 + + run(self.session, f"save {file_path} models #{self.mol.id[0]}") self.session.logger.info(f"Saved candidate_id: {candidates}") except: - self.session.logger.error("Failed to parse the candidates field.") + self.session.logger.error("Failed to parse the candidates id field.") def zero_density_button_clicked(self): if self.vol is None: From 6746c455f234524967716006535f95e93c69752c Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Tue, 10 Dec 2024 17:41:33 +0300 Subject: [PATCH 61/79] grid_spacing bug clear --- src/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 985f1b2..4e7333d 100644 --- a/src/tool.py +++ b/src/tool.py @@ -2074,7 +2074,7 @@ def simulate_volume_clicked(self): res) elif self.fit_input_mode == "interactive": from chimerax.map.molmap import molecule_map - self.mol_vol = molecule_map(self.session, self.mol.atoms, res, grid_spacing=self.vol.data.step / 3) + self.mol_vol = molecule_map(self.session, self.mol.atoms, res, grid_spacing=self.vol.data.step[0] / 3.0) return From bf258761c7454e4a8f3b6c7b39eaca4e3013c3b2 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Wed, 11 Dec 2024 18:12:03 +0300 Subject: [PATCH 62/79] calculate_candidate_indices bug clear: [] list + int --- src/tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tool.py b/src/tool.py index 4e7333d..07e0c26 100644 --- a/src/tool.py +++ b/src/tool.py @@ -66,10 +66,10 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): if largest_gap_ratio > 1: return np.array(range(largest_gap_index + 1)) else: - return [] + return np.array([]) except Exception as e: print(f"Error calculating selected indices: {e}") - return [] + return np.array([]) def generate_q_shells_wrapper(q_shell_generator, mol_path, q_shells_ext, session): From a72c2a74bc287baa9d8dbbad67e428c60f738fb2 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 12 Dec 2024 14:54:55 +0300 Subject: [PATCH 63/79] remove duplicated code for generate_q_shells_wrapper --- src/tool.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/src/tool.py b/src/tool.py index 07e0c26..cb535d7 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1832,30 +1832,10 @@ def q_shell_button_clicked(self, mode: str): str_dir = self.q_shells_dir.text() for file_name in sorted(os.listdir(str_dir)): - file_path = os.path.join(str_dir, file_name) - file_extension = os.path.splitext(file_path)[1].lower() + mol_path = os.path.join(str_dir, file_name) + file_extension = os.path.splitext(mol_path)[1].lower() if file_extension in ['.pdb', '.cif']: - print(f"Q shell generator: {q_shell_generator}: {file_path}") - - mol = run(self.session, f'open {file_path}')[0] - mol_basename = file_name.split('.')[0] - - # center mol - from chimerax.geometry import Place - mol_center = mol.atoms.coords.mean(axis=0) - transform = Place(origin=-mol_center) - mol.atoms.transform(transform) - mol.position = Place() - - q_shell_coords, radii = q_shell_generator(mol) - - q_shells_filepath = os.path.join(str_dir, f"{mol_basename}.{ext}") - np.savez_compressed(q_shells_filepath, - q_shell_coords=q_shell_coords, - radii=radii) # Not atomic warning: q_scores_points_per_shell not saved!!! - - run(self.session, f"close #{mol.id[0]}") - + _, _ = generate_q_shells_wrapper(q_shell_generator, mol_path, ext, self.session) def sim_button_clicked(self): From 631cc840892b3e62f3aa17cc1043d2772dc93a01 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Thu, 12 Dec 2024 17:58:07 +0300 Subject: [PATCH 64/79] not using user scene transformed coords --- src/tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index cb535d7..a531142 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1711,7 +1711,6 @@ def single_fit_button_clicked(self): # Apply the user's transformation and center mol from chimerax.geometry import Place - mol.atoms.transform(mol.position) mol_center = mol.atoms.coords.mean(axis=0) transform = Place(origin=-mol_center) mol.atoms.transform(transform) From 1f602073878703d23c9d234414410aa1ce4b3168 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 10:13:59 +0300 Subject: [PATCH 65/79] better doc for Gaussian mode --- src/tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index a531142..ee845a0 100644 --- a/src/tool.py +++ b/src/tool.py @@ -991,7 +991,8 @@ def build_settings_ui(self, layout): layout.addLayout(row) doc_label = QLabel("If the map's resolution < 5.0, we suggest using \"Gaussian with negative (shrink)\".\n" - "Otherwise, we suggest using \"Gaussian then negative (expand)\".\n") + "Otherwise, we suggest give \"Gaussian then negative (expand)\" a try and see. " + "But the influence of this parameter is mild in most cases. \n") doc_label.setWordWrap(True) row.addWidget(doc_label) From c3b8f318929e667039c16a2731e9ee1b413791f9 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 10:23:01 +0300 Subject: [PATCH 66/79] removed target volume in the View tab read from saved data --- src/tool.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/tool.py b/src/tool.py index ee845a0..d377392 100644 --- a/src/tool.py +++ b/src/tool.py @@ -149,7 +149,6 @@ class DiffFitSettings: def __init__(self): # viewing self.view_output_directory: str = "D:/GIT/DiffFit/dev_data/output" - self.view_target_vol_path: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/density2.mrc" self.view_structures_directory: str = "D:/GIT/DiffFit/dev_data/input/domain_fit_demo_3domains/subunits_cif" # computing @@ -350,8 +349,7 @@ def load_settings(self): self.conv_weights.setText("[{0}]".format(','.join(map(str, self.settings.conv_weights)))) # view - self.target_vol.setText(self.settings.view_target_vol_path) - self.dataset_folder.setText(self.settings.view_output_directory) + self.dataset_folder.setText(self.settings.view_output_directory) # clustering self.df_cid_threshold.setValue(self.settings.df_cid_threshold) @@ -389,8 +387,7 @@ def store_settings(self): #view self.settings.view_output_directory = self.dataset_folder.text() - self.settings.view_target_vol_path = self.target_vol.text() - + # clustering self.settings.df_cid_threshold = self.df_cid_threshold.value() self.settings.clustering_in_contour_threshold = self.clustering_in_contour_threshold.value() @@ -398,8 +395,6 @@ def store_settings(self): self.settings.clustering_angle_tolerance = self.clustering_angle_tolerance.value() self.settings.clustering_shift_tolerance = self.clustering_shift_tolerance.value() - #print(self.settings) - #print(self.settings.view_target_vol_path) def build_single_fit_ui(self, layout): row = QHBoxLayout() @@ -1010,18 +1005,7 @@ def build_view_ui(self, layout): layout.addWidget(self._view_input_mode, row, 1, 1, 2) row = row + 1 - target_vol_label = QLabel("Target Volume:") - self.target_vol = QLineEdit() - self.target_vol.textChanged.connect(lambda: self.store_settings()) - self.target_vol.setEnabled(False) - self.target_vol_select = QPushButton("Select") - self.target_vol_select.setEnabled(False) - self.target_vol_select.clicked.connect(lambda: self.select_clicked("Target Volume", self.target_vol, False, "MRC Files(*.mrc);;MAP Files(*.map)")) - layout.addWidget(target_vol_label, row, 0) - layout.addWidget(self.target_vol, row, 1) - layout.addWidget(self.target_vol_select, row, 2) - row = row + 1 - + # data folder - where the data is stored dataset_folder_label = QLabel("Result Folder:") self.dataset_folder = QLineEdit() From 98f813de4bd70a569309cf283f0c67ef76451342 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 10:49:35 +0300 Subject: [PATCH 67/79] removed target volume in the View tab --- src/tool.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/tool.py b/src/tool.py index d377392..6cddcf3 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1299,14 +1299,10 @@ def _Gaussian_mode_changed(self): def _view_input_mode_changed(self): if self._view_input_mode.currentText() == "interactive": self.fit_input_mode = "interactive" - self.target_vol.setEnabled(False) - self.target_vol_select.setEnabled(False) self.dataset_folder.setEnabled(False) self.dataset_folder_select.setEnabled(False) elif self._view_input_mode.currentText() == "disk file": self.fit_input_mode = "disk file" - self.target_vol.setEnabled(False) - self.target_vol_select.setEnabled(False) self.dataset_folder.setEnabled(True) self.dataset_folder_select.setEnabled(True) @@ -1942,7 +1938,6 @@ def run_button_clicked(self): f"DiffFit optimization time elapsed: {timer_stop - timer_start}\n") # copy the directories - self.target_vol.setText(self.settings.target_vol_path) self.dataset_folder.setText("{0}".format(self.settings.output_directory)) #print(self.settings) From c67f30c875ee1e840e0afbc3f2a55ee1fdfc99be Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 11:37:11 +0300 Subject: [PATCH 68/79] add dependency ChimeraX-QScore --- bundle_info.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/bundle_info.xml b/bundle_info.xml index 1d2db1d..f09ef4a 100644 --- a/bundle_info.xml +++ b/bundle_info.xml @@ -30,6 +30,7 @@ will be displayed without the ChimeraX- prefix. + From eaa3c462a6d14e5dc081fe15dd0b3a05efdfdfb5 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 11:37:33 +0300 Subject: [PATCH 69/79] not using user scene transformed coords for cmd --- src/dfit_cmd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dfit_cmd.py b/src/dfit_cmd.py index fab2371..5306de3 100644 --- a/src/dfit_cmd.py +++ b/src/dfit_cmd.py @@ -104,7 +104,6 @@ def dfit(session, mol, in_map, # Apply the user's transformation and center mol from chimerax.geometry import Place - mol.atoms.transform(mol.position) mol_center = mol.atoms.coords.mean(axis=0) transform = Place(origin=-mol_center) mol.atoms.transform(transform) From c171e3d3f08f9ce181afc06cd53200a726244a8a Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 16:50:04 +0300 Subject: [PATCH 70/79] removed target volume in cmd --- src/dfit_cmd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dfit_cmd.py b/src/dfit_cmd.py index 5306de3..988291f 100644 --- a/src/dfit_cmd.py +++ b/src/dfit_cmd.py @@ -326,7 +326,6 @@ def dfit_disk(session, str_dir, in_map, level, if df is not None: # copy the directories - df.target_vol.setText(in_map) df.dataset_folder.setText(_out_dir) # output is tensor, convert to numpy From a4d4df9f3e4ed041944a565ae2baec3e2d9f465b Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 16:50:40 +0300 Subject: [PATCH 71/79] fix mol_basename bug --- src/tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tool.py b/src/tool.py index 6cddcf3..1690511 100644 --- a/src/tool.py +++ b/src/tool.py @@ -75,7 +75,7 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): def generate_q_shells_wrapper(q_shell_generator, mol_path, q_shells_ext, session): print(f"Q shell generator: {q_shell_generator}: {mol_path}") - mol_basename = mol_path.split('.')[0] + mol_basename = os.path.basename(mol_path) mol_folder = os.path.dirname(mol_path) mol = run(session, f'open {mol_path}')[0] @@ -1518,7 +1518,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, q_shells_ext = "centered_q_shells.simple.npz" for mol_path in mol_paths: - mol_basename = mol_path.split('.')[0] + mol_basename = os.path.basename(mol_path) mol_folder = os.path.dirname(mol_path) q_shells_filepath = os.path.join(mol_folder, f"{mol_basename}.{q_shells_ext}") From edef62645aa1e93465992985d387ee595b08f524 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 16:52:55 +0300 Subject: [PATCH 72/79] view orient in show_results --- src/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 1690511..289337a 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1580,7 +1580,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, self.mol_paths = mol_paths self.cluster_idx = 0 - run(self.session, "view") + run(self.session, "view orient") def _create_volume_conv_list(self, vol, smooth_by, smooth_loops, session, negative_space_value=-0.5): # From here on, there are three strategies for utilizing gaussian smooth From 14cd38ed2e62138c0b826e3e3389466583c93f1f Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 17:48:46 +0300 Subject: [PATCH 73/79] add mol_num_atoms; remove mol_centers --- src/DiffAtomComp.py | 17 ++++++++--------- src/tablemodel.py | 13 ++++++++----- src/tool.py | 22 +++++++++++----------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index 53a6b9a..d914d95 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -122,7 +122,7 @@ def q2_unit_coord(Q): return np.concatenate((rotated_up, rotated_right), axis=-1) -def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3.0, angle_tolerance: float = 6.0, +def cluster_and_sort_sqd_fast(e_sqd_log, shift_tolerance: float = 3.0, angle_tolerance: float = 6.0, sort_column_idx: int = 7, in_contour_threshold: float = 0.5, correlation_threshold: float = 0.5, @@ -145,7 +145,6 @@ def cluster_and_sort_sqd_fast(e_sqd_log, mol_centers, shift_tolerance: float = 3 3. sort the cluster table in descending order by correlation, or the metric at the sort_column_idx @param e_sqd_log: fitting results in sqd table - @param mol_centers: molecule atom coords centers @param shift_tolerance: shift tolerance in Angstrom @param angle_tolerance: angle tolerance in degrees @param sort_column_idx: the column to sort, 9-th column is the correlation @@ -839,7 +838,7 @@ def diff_fit(volume_list: list, # ======= get atom coords atom_coords_list = mol_coords # atom coords as [x, y, z] - mol_centers = [np.mean(coords, axis=0) for coords in atom_coords_list] + mol_num_atoms = [len(coords) for coords in atom_coords_list] num_molecules = len(atom_coords_list) # read simulated map @@ -955,7 +954,7 @@ def diff_fit(volume_list: list, target_vol_path=vol_path, target_surface_threshold=target_surface_threshold, mol_paths=[mol_path], - mol_centers=mol_centers, + mol_num_atoms=mol_num_atoms, opt_res=e_sqd_log_np) # np.save(f"{out_dir}/sampled_coords.npy", sampled_coords) @@ -969,7 +968,7 @@ def diff_fit(volume_list: list, return (vol_path, target_surface_threshold, [mol_path], - mol_centers, + mol_num_atoms, e_sqd_log_np) @@ -1041,8 +1040,8 @@ def diff_atom_comp(target_vol_path: str, # center the mol mol_centers = [np.mean(coords, axis=0) for coords in atom_coords_list] atom_coords_list = center_atom_coords_list(atom_coords_list, mol_centers) - # re-calculate the centers, should be all near zero - mol_centers = [np.mean(coords, axis=0) for coords in atom_coords_list] + + mol_num_atoms = [len(coords) for coords in atom_coords_list] # ======= optimization @@ -1158,7 +1157,7 @@ def diff_atom_comp(target_vol_path: str, target_vol_path=target_vol_path, target_surface_threshold=target_surface_threshold, mol_paths=mol_paths, - mol_centers=mol_centers, + mol_num_atoms=mol_num_atoms, opt_res=e_sqd_log.detach().cpu().numpy()) # np.save(f"{out_dir}/sampled_coords.npy", sampled_coords) @@ -1172,7 +1171,7 @@ def diff_atom_comp(target_vol_path: str, return (target_vol_path, target_surface_threshold, mol_paths, - mol_centers, + mol_num_atoms, e_sqd_log) diff --git a/src/tablemodel.py b/src/tablemodel.py index 76e66a8..dd16f19 100644 --- a/src/tablemodel.py +++ b/src/tablemodel.py @@ -3,15 +3,16 @@ class TableModel(QAbstractTableModel): """A model to interface a Qt view with pandas dataframe """ - def __init__(self, sqd_cluster_data, sqd_data, mol_paths, parent=None): + def __init__(self, sqd_cluster_data, sqd_data, mol_paths, mol_num_atoms, parent=None): QAbstractTableModel.__init__(self, parent) self._sqd_data = sqd_data self._sqd_cluster_data = sqd_cluster_data import os self._mol_names = [os.path.splitext(os.path.basename(path))[0] for path in mol_paths] + self._mol_num_atoms = mol_num_atoms - self._header = ["Id", "Mol name", "Q-score", "Hits", + self._header = ["Id", "Mol name", "Q-score", "Hits", "# Atoms", "Density (normalized)", "Overlap", "Correlation", "Cam", "Inside", "Avg Density (in)", "Avg Density (all)", "DF CID"] @@ -66,10 +67,12 @@ def data(self, index: QModelIndex, role=Qt.ItemDataRole): return None elif column == 3: return int(self._sqd_cluster_data[index.row(), 4]) - elif 4 <= column <= 11: + elif column == 4: + return int(self._mol_num_atoms[mol_idx]) + elif 5 <= column <= 12: record_row = self._sqd_data[mol_idx, record_idx, iter_idx] - return float(round(float(record_row[column + 3]) * 10000)) / 10000.0 # for 4 decimals - # return float(record_row[index.column() + 4]) + return float(round(float(record_row[column + 2]) * 10000)) / 10000.0 # for 4 decimals + # return float(record_row[index.column() + 2]) return None diff --git a/src/tool.py b/src/tool.py index 289337a..28f81f9 100644 --- a/src/tool.py +++ b/src/tool.py @@ -251,7 +251,7 @@ def __init__(self, session, tool_name): self.interactive_fit_result_ready = False self.fit_result = None - self.mol_centers = None + self.mol_num_atoms = None self.cluster_color_map = {} @@ -1437,7 +1437,7 @@ def select_clicked(self, text, target, save = False, pattern = "dir"): return fileName, ext - def show_results(self, e_sqd_log, mol_centers, mol_paths, + def show_results(self, e_sqd_log, mol_num_atoms, mol_paths, target_vol_path=None, target_surface_threshold=None, save_log=False, @@ -1469,7 +1469,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, N_mol, N_quat, N_shift, N_iter, N_metric = e_sqd_log.shape self.e_sqd_log = e_sqd_log.reshape([N_mol, N_quat * N_shift, N_iter, N_metric]) - self.e_sqd_clusters_ordered = cluster_and_sort_sqd_fast(self.e_sqd_log, mol_centers, + self.e_sqd_clusters_ordered = cluster_and_sort_sqd_fast(self.e_sqd_log, self.settings.clustering_shift_tolerance, self.settings.clustering_angle_tolerance, in_contour_threshold=self.settings.clustering_in_contour_threshold, @@ -1565,7 +1565,7 @@ def show_results(self, e_sqd_log, mol_centers, mol_paths, self.e_sqd_clusters_ordered = self.e_sqd_clusters_ordered[self.e_sqd_clusters_ordered[:, q_score_column].argsort()[::-1]] # ======= Create fit results table - self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths) + self.model = TableModel(self.e_sqd_clusters_ordered, self.e_sqd_log, mol_paths, mol_num_atoms) self.proxyModel = QSortFilterProxyModel() self.proxyModel.setSourceModel(self.model) @@ -1721,7 +1721,7 @@ def single_fit_button_clicked(self): (_, _, self.mol_paths, - self.mol_centers, + self.mol_num_atoms, self.fit_result) = diff_fit( volume_conv_list, self.fit_vol.path, @@ -1752,7 +1752,7 @@ def single_fit_button_clicked(self): self._view_input_mode.setCurrentText("interactive") self._view_input_mode_changed() self.interactive_fit_result_ready = True - self.show_results(self.fit_result, self.mol_centers, self.mol_paths, + self.show_results(self.fit_result, self.mol_num_atoms, self.mol_paths, save_log=_save_results, log_path=f"{_out_dir}/log.log") @@ -1909,7 +1909,7 @@ def run_button_clicked(self): (target_vol_path, target_surface_threshold, mol_paths, - mol_centers, + mol_num_atoms, e_sqd_log) = diff_atom_comp( target_vol_path=self.settings.target_vol_path, target_surface_threshold=self.settings.target_surface_threshold, @@ -1943,7 +1943,7 @@ def run_button_clicked(self): # output is tensor, convert to numpy self.show_results(e_sqd_log.detach().cpu().numpy(), - mol_centers, + mol_num_atoms, mol_paths, target_vol_path, target_surface_threshold, @@ -1967,7 +1967,7 @@ def load_button_clicked(self): if self.fit_input_mode == "interactive": if self.interactive_fit_result_ready: self.show_results(self.fit_result, - self.mol_centers, + self.mol_num_atoms, [self.mol.filename], self.fit_vol.path, self.fit_vol.maximum_surface_level) @@ -1991,10 +1991,10 @@ def load_button_clicked(self): target_vol_path = fit_res['target_vol_path'] target_surface_threshold = fit_res['target_surface_threshold'] mol_paths = fit_res['mol_paths'] - mol_centers = fit_res['mol_centers'] + mol_num_atoms = fit_res['mol_num_atoms'] opt_res = fit_res['opt_res'] - self.show_results(opt_res, mol_centers, mol_paths, target_vol_path, target_surface_threshold, + self.show_results(opt_res, mol_num_atoms, mol_paths, target_vol_path, target_surface_threshold, save_log=True, log_path=f"{datasetoutput}/log.log") self.select_table_item(0) From 5ed7950d39f220bdf8ab100fb57c1246ebc9c924 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 17:54:28 +0300 Subject: [PATCH 74/79] fix mol_basename bug --- src/tool.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tool.py b/src/tool.py index 28f81f9..f7bfaad 100644 --- a/src/tool.py +++ b/src/tool.py @@ -40,6 +40,7 @@ import sys import numpy as np import os +from pathlib import Path import torch import psutil import platform @@ -75,7 +76,7 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): def generate_q_shells_wrapper(q_shell_generator, mol_path, q_shells_ext, session): print(f"Q shell generator: {q_shell_generator}: {mol_path}") - mol_basename = os.path.basename(mol_path) + mol_basename = Path(mol_path).stem mol_folder = os.path.dirname(mol_path) mol = run(session, f'open {mol_path}')[0] @@ -1518,7 +1519,7 @@ def show_results(self, e_sqd_log, mol_num_atoms, mol_paths, q_shells_ext = "centered_q_shells.simple.npz" for mol_path in mol_paths: - mol_basename = os.path.basename(mol_path) + mol_basename = Path(mol_path).stem mol_folder = os.path.dirname(mol_path) q_shells_filepath = os.path.join(mol_folder, f"{mol_basename}.{q_shells_ext}") From 88aff3c39ed8f8906eab51560d3395ac90afa5eb Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 18:01:40 +0300 Subject: [PATCH 75/79] git ignore *.npz --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 705cc79..6cb2925 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ dev_data/output_old/* case_study_data run /dev_data/input/domain_fit_demo_3domains/subunits_cif_test +*.npz From 7001f086bb874e2896bf82bc04b0e0361c80c3ee Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 18:14:49 +0300 Subject: [PATCH 76/79] all basename using Path().stem now --- src/DiffAtomComp.py | 3 ++- src/convert2mrc_npy.py | 3 ++- src/split_chains.py | 3 ++- src/tool.py | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index d914d95..f2a3ce1 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -1,5 +1,6 @@ import argparse import os, sys +from pathlib import Path from datetime import datetime import numpy as np @@ -508,7 +509,7 @@ def read_file_and_get_coordinates(file_path, fit_atom_mode="Backbone"): parser = PDBParser() # Parse the structure - structure_id = os.path.basename(file_path).split('.')[0] # Use file name as structure ID + structure_id = Path(file_path).stem # Use file name as structure ID structure = parser.get_structure(structure_id, file_path) # Initialize a list to hold all atom coordinates diff --git a/src/convert2mrc_npy.py b/src/convert2mrc_npy.py index 452cdbe..5e5f6ea 100644 --- a/src/convert2mrc_npy.py +++ b/src/convert2mrc_npy.py @@ -11,6 +11,7 @@ from chimerax.core.commands import run from datetime import datetime import numpy as np +from pathlib import Path timer_start = datetime.now() @@ -30,7 +31,7 @@ structure = run(session, f'open {full_path}')[0] # Get the base name of the input structure for naming output files - structure_basename = os.path.basename(full_path).split('.')[0] + structure_basename = Path(full_path).stem # Save the structure's coordinates as a npy file npy_filename = f"{structure_basename}.npy" diff --git a/src/split_chains.py b/src/split_chains.py index 5232810..53fee9c 100644 --- a/src/split_chains.py +++ b/src/split_chains.py @@ -8,6 +8,7 @@ # Import necessary modules import os, sys +from pathlib import Path from chimerax.core.commands import run from datetime import datetime import numpy as np @@ -25,7 +26,7 @@ structure = run(session, f'open {input_model}')[0] # Get the base name of the input structure for naming output files -structure_basename = os.path.basename(input_model).split('.')[0] +structure_basename = Path(input_model).stem # Iterate over each chain in the structure chain_id_name_list = [] diff --git a/src/tool.py b/src/tool.py index f7bfaad..ada40ef 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1827,7 +1827,7 @@ def sim_button_clicked(self): file_extension = os.path.splitext(file_path)[1].lower() if file_extension in ['.pdb', '.cif']: structure = run(self.session, f'open {file_path}')[0] - structure_basename = file_name.split('.')[0] + structure_basename = Path(file_name).stem mrc_filename = f"{structure_basename}.mrc" mrc_filepath = os.path.join(str_dir, mrc_filename) @@ -1844,7 +1844,7 @@ def split_button_clicked(self): os.makedirs(output_dir) structure = self._split_model.value - structure_basename = os.path.basename(structure.filename).split('.')[0] + structure_basename = Path(structure.filename).stem chain_id_name_list = [] for chain in structure.chains: From c8e9034fcf0055734f0ed6aea5dadca1e10c608f Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 20:16:46 +0300 Subject: [PATCH 77/79] all suffix using Path().suffix now --- src/DiffAtomComp.py | 8 ++++---- src/tool.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DiffAtomComp.py b/src/DiffAtomComp.py index f2a3ce1..4052eb1 100644 --- a/src/DiffAtomComp.py +++ b/src/DiffAtomComp.py @@ -433,7 +433,7 @@ def mrc_folder_to_npy_list(mrc_folder): full_path = os.path.join(mrc_folder, file_name) # Check if the current path is a file and not a directory if os.path.isfile(full_path): - file_extension = os.path.splitext(full_path)[1].lower() + file_extension = Path(file_name).suffix.lower() if file_extension in ['.mrc', '.map']: data, steps, origin = mrc_to_npy(full_path) sim_map_list.append((data, steps, origin)) @@ -500,7 +500,7 @@ def transform_to_angstrom_space(ndc_shift, box_size, box_origin, atom_center_in_ def read_file_and_get_coordinates(file_path, fit_atom_mode="Backbone"): # Determine file extension - file_extension = os.path.splitext(file_path)[1].lower() + file_extension = Path(file_path).suffix.lower() # Initialize parser based on file extension if file_extension == '.cif': @@ -696,7 +696,7 @@ def read_all_files_to_atom_coords_list(structures_dir, fit_atom_mode="Backbone") for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) - file_extension = os.path.splitext(full_path)[1].lower() + file_extension = Path(file_name).suffix.lower() if file_extension in ['.pdb', '.cif']: # Read the atom coordinates from the file atom_coords = read_file_and_get_coordinates(full_path, fit_atom_mode) @@ -1150,7 +1150,7 @@ def diff_atom_comp(target_vol_path: str, mol_paths = [] for file_name in sorted(os.listdir(structures_dir)): full_path = os.path.join(structures_dir, file_name) - file_extension = os.path.splitext(full_path)[1].lower() + file_extension = Path(file_name).suffix.lower() if file_extension in ['.pdb', '.cif']: mol_paths.append(full_path) diff --git a/src/tool.py b/src/tool.py index ada40ef..7f57e35 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1814,7 +1814,7 @@ def q_shell_button_clicked(self, mode: str): for file_name in sorted(os.listdir(str_dir)): mol_path = os.path.join(str_dir, file_name) - file_extension = os.path.splitext(mol_path)[1].lower() + file_extension = Path(file_name).suffix.lower() if file_extension in ['.pdb', '.cif']: _, _ = generate_q_shells_wrapper(q_shell_generator, mol_path, ext, self.session) @@ -1824,7 +1824,7 @@ def sim_button_clicked(self): for file_name in sorted(os.listdir(str_dir)): file_path = os.path.join(str_dir, file_name) - file_extension = os.path.splitext(file_path)[1].lower() + file_extension = Path(file_name).suffix.lower() if file_extension in ['.pdb', '.cif']: structure = run(self.session, f'open {file_path}')[0] structure_basename = Path(file_name).stem From a68f257eb2b57009d05c3ca14dc2ada238c39595 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 20:35:56 +0300 Subject: [PATCH 78/79] better log when no candidates id --- src/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tool.py b/src/tool.py index 7f57e35..d73a3ac 100644 --- a/src/tool.py +++ b/src/tool.py @@ -69,7 +69,7 @@ def calculate_candidate_indices(q_scores, num_test_fits=20): else: return np.array([]) except Exception as e: - print(f"Error calculating selected indices: {e}") + print(f"Warn: no candidates id: {e}") return np.array([]) From 9f73b29be62f602ff46a8bdf5c37198b512f9604 Mon Sep 17 00:00:00 2001 From: Roden Luo Date: Fri, 13 Dec 2024 20:40:54 +0300 Subject: [PATCH 79/79] view orient after table row selected --- src/dfit_cmd.py | 2 ++ src/tool.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/dfit_cmd.py b/src/dfit_cmd.py index 988291f..3e7300d 100644 --- a/src/dfit_cmd.py +++ b/src/dfit_cmd.py @@ -173,6 +173,7 @@ def dfit(session, mol, in_map, df.tab_widget.setCurrentWidget(df.tab_view_group) df.select_table_item(0) + run(session, "view orient") timer_stop = datetime.now() print(f"\nDiffFit total time elapsed: {timer_stop - single_fit_timer_start}\n\n") @@ -336,6 +337,7 @@ def dfit_disk(session, str_dir, in_map, level, target_surface_threshold) df.tab_widget.setCurrentWidget(df.tab_view_group) df.select_table_item(0) + run(session, "view orient") timer_stop = datetime.now() print(f"\nDiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n\n") diff --git a/src/tool.py b/src/tool.py index d73a3ac..219d1ed 100644 --- a/src/tool.py +++ b/src/tool.py @@ -1581,7 +1581,6 @@ def show_results(self, e_sqd_log, mol_num_atoms, mol_paths, self.mol_paths = mol_paths self.cluster_idx = 0 - run(self.session, "view orient") def _create_volume_conv_list(self, vol, smooth_by, smooth_loops, session, negative_space_value=-0.5): # From here on, there are three strategies for utilizing gaussian smooth @@ -1760,6 +1759,7 @@ def single_fit_button_clicked(self): self.tab_widget.setCurrentWidget(self.tab_view_group) self.select_table_item(0) + run(self.session, "view orient") timer_stop = datetime.now() print(f"\nDiffFit total time elapsed: {timer_stop - single_fit_timer_start}\n\n") @@ -1952,6 +1952,7 @@ def run_button_clicked(self): log_path=f"{_out_dir}/log.log") self.tab_widget.setCurrentWidget(self.tab_view_group) self.select_table_item(0) + run(self.session, "view orient") timer_stop = datetime.now() print(f"\nDiffFit total time elapsed: {timer_stop - disk_fit_timer_start}\n\n")