diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a76dc0..8ccd454 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,7 +70,7 @@ repos: hooks: - id: mypy name: checking types - entry: mypy + entry: mypy --explicit-package-bases language: system types: [ python ] diff --git a/README.rst b/README.rst index e7c2d0c..0f6a1e6 100644 --- a/README.rst +++ b/README.rst @@ -28,15 +28,15 @@ I want to know the minimal RMSD between two molecules To calculate the structural difference between two molecules, you might initially compute the RMSD directly (**Figure 2.a**). However, this straightforward approach could give you a misleadingly large value. To get the true minimal RMSD, you must adjust for translations (**Figure 2.b**) and rotations (**Figure 2.c**). This process aligns the two molecules in the best possible way, ensuring the RMSD accurately reflects their structural similarity after optimal alignment. -.. list-table:: +.. list-table:: :header-rows: 1 * - 1.a - 1.b - 1.c - * - |fig1.a| - - |fig1.b| + * - |fig1.a| + - |fig1.b| - |fig1.c| * - RMSD = 2.8 @@ -48,27 +48,27 @@ To get the true minimal RMSD, you must adjust for translations (**Figure 2.b**) I do not know the order of the atoms ------------------------------------ -Atom reordering methods can be used in cases where the atoms in the two molecules are not in the same order (Figure 2.1). These algorithms find the optimal mapping of atoms between the two structures to minimize RMSD. +Atom reordering methods can be used in cases where the atoms in the two molecules are not in the same order (Figure 2.1). These algorithms find the optimal mapping of atoms between the two structures to minimize RMSD. Each method has its limitations because finding the best atom mapping depends on having the structures properly aligned. This is usually done by comparing atom-pair distances. If the molecules are already aligned, using the Hungarian linear sum assignment works well. If they’re not aligned, you can either align the molecules using their inertia eigenvectors (**Figure 2.a**) or use atomic descriptors (**Figure 2.b**), independent of the coordinate system, to reorder the atoms. Note that all reordering methods have limitations and drawbacks and might not actually find the true order. .. _Hungarian: https://en.wikipedia.org/wiki/Hungarian_algorithm -.. list-table:: +.. list-table:: :header-rows: 1 * - 2.a - 2.b - 2.c - * - |fig2.a| - - |fig2.b| + * - |fig2.a| + - |fig2.b| - |fig2.c| **Figure 2**: **a)** Two identical molecules, but not in the same atomic order, making it impossible to rotate correctly. **b)** Illustrating the molecular inertia vectors used to align molecules to find the atom mapping. -**c)** Illustrating atomic representation for individual atoms. Structure-dependent but coordinate system-independent, which can be used for atom mapping. +**c)** Illustrating atomic representation for individual atoms. Structure-dependent but coordinate system-independent, which can be used for atom mapping. Installation ============ @@ -111,11 +111,11 @@ If the atoms are scrambled and not aligned, you can use the ``--reorder`` argument, which will align the atoms from structure B onto A. Use ``--reorder-method`` to select the reordering method. -Choose between -Inertia_ aligned Hungarian_ distance (default), -Hungarian_ distance, +Choose between +Inertia_ aligned Hungarian_ distance (default), +Hungarian_ distance, distance (very approximate) -QML atomic representation (coordinate independent), +QML atomic representation (coordinate independent), and brute force (don't). .. _Hungarian: https://en.wikipedia.org/wiki/Hungarian_algorithm @@ -152,21 +152,21 @@ Calculate Root-mean-square deviation (RMSD) of Two Molecules Using Rotation, Git http://github.com/charnley/rmsd, -.. list-table:: +.. list-table:: :header-rows: 1 * - Method - Argument - Citation - * - **Kabsch** + * - **Kabsch** - ``--rotation-method kabsch`` (Default) - Wolfgang Kabsch (1976), Acta Crystallographica, A32:922-923 http://dx.doi.org/10.1107/S0567739476001873 - * - **Quaternion** + * - **Quaternion** - ``--rotation-method quaternion`` - Walker, Shao & Volz (1991), CVGIP: Image Understanding, 54:358-367, @@ -179,9 +179,9 @@ http://github.com/charnley/rmsd, http://dx.doi.org/10.1109/TAES.2016.140952 - * - **FCHL19** + * - **FCHL19** - ``--reorder-method qml`` - - Christensen et al (2020), J. Chem. Phys. 152, 044107 + - Christensen et al (2020), J. Chem. Phys. 152, 044107 https://doi.org/10.1063/1.5126701 @@ -207,4 +207,3 @@ For example, some hydrogens are noted as ``HG11``, which we assume is not mercur .. |fig2.a| image:: https://raw.githubusercontent.com/charnley/rmsd/refs/heads/charnley/doc/docs/figures/fig_reorder_problem.png .. |fig2.b| image:: https://raw.githubusercontent.com/charnley/rmsd/refs/heads/charnley/doc/docs/figures/fig_reorder_inertia.png .. |fig2.c| image:: https://raw.githubusercontent.com/charnley/rmsd/refs/heads/charnley/doc/docs/figures/fig_reorder_qml.png - diff --git a/rmsd/calculate_rmsd.py b/rmsd/calculate_rmsd.py index f506075..9da942c 100644 --- a/rmsd/calculate_rmsd.py +++ b/rmsd/calculate_rmsd.py @@ -678,8 +678,8 @@ def kabsch_weighted_fit( PNEW: ndarray = np.dot(P, R.T) + T if return_rmsd: return (PNEW, rmsd_) - else: - return (PNEW, None) + + return (PNEW, None) def kabsch_weighted_rmsd(P: ndarray, Q: ndarray, W: Optional[ndarray] = None) -> float: @@ -842,20 +842,16 @@ def hungarian_vectors( if use_kernel: # Calculate cost matrix from similarity kernel - K = laplacian_kernel(p_vecs, q_vecs, sigma) - K *= -1.0 - K += 1.0 + kernel = laplacian_kernel(p_vecs, q_vecs, sigma) + kernel *= -1.0 + kernel += 1.0 else: - K = distance_matrix(p_vecs, q_vecs) + kernel = distance_matrix(p_vecs, q_vecs) - # Perform Hungarian analysis on distance matrix between atoms of 1st - # structure and trial structure - indices_b: ndarray - indices_a: ndarray - indices_a, indices_b = linear_sum_assignment(K) + _, indices_q = linear_sum_assignment(kernel) - return indices_b + return indices_q def reorder_similarity( @@ -900,7 +896,6 @@ def reorder_similarity( } p_vecs = generate_fchl19(p_atoms, p_coord, **parameters) - q_vecs = generate_fchl19(q_atoms, q_coord, **parameters) # generate full view from q shape to fill in atom view on the fly @@ -975,23 +970,20 @@ def reorder_distance( return view_reorder -def hungarian(A: ndarray, B: ndarray) -> ndarray: +def _hungarian(A: ndarray, B: ndarray) -> ndarray: """ - Hungarian reordering. + Hungarian reordering, minimizing the cost distances[winner_rows, winner_cols].sum()) - Assume A and B are coordinates for atoms of SAME type only + Assume A and B are coordinates for atoms of SAME type only. """ - # should be kabasch here i think distances = cdist(A, B, "euclidean") # Perform Hungarian analysis on distance matrix between atoms of 1st # structure and trial structure - indices_b: ndarray - indices_a: ndarray - indices_a, indices_b = linear_sum_assignment(distances) + winner_rows, winner_cols = linear_sum_assignment(distances) - return indices_b + return winner_cols def reorder_hungarian( @@ -1038,7 +1030,7 @@ def reorder_hungarian( _p_coord = p_coord[p_atom_idx] _q_coord = q_coord[q_atom_idx] - view = hungarian(_p_coord, _q_coord) + view = _hungarian(_p_coord, _q_coord) view_reorder[p_atom_idx] = q_atom_idx[view] return view_reorder @@ -1076,9 +1068,13 @@ def reorder_inertia_hungarian( """ + p_coord = np.array(p_coord, copy=True) + q_coord = np.array(q_coord, copy=True) + p_coord -= get_cm(p_atoms, p_coord) q_coord -= get_cm(q_atoms, p_coord) + # Calculate inertia vectors for both structures inertia_p = get_inertia_tensor(p_atoms, p_coord) eigval_p, eigvec_p = np.linalg.eig(inertia_p) @@ -1101,12 +1097,11 @@ def reorder_inertia_hungarian( for mirror in AXIS_REFLECTIONS: - tmp_eigvec = eigvec_p * mirror.T + tmp_eigvec = eigvec_q * mirror.T tmp_coord = np.dot(q_coord, tmp_eigvec) - tmp_coord -= get_cm(q_atoms, tmp_coord) test_review = reorder_hungarian(p_atoms, q_atoms, p_coord, tmp_coord) - test_rmsd = rmsd(tmp_coord[test_review], p_coord) + test_rmsd = kabsch_rmsd(tmp_coord[test_review], p_coord) if test_rmsd < best_rmsd: best_rmsd = test_rmsd @@ -1441,7 +1436,9 @@ def get_principal_axis(atoms: ndarray, V: ndarray) -> ndarray: return principal_axis -def set_coordinates(atoms: ndarray, V: ndarray, title: str = "", decimals: int = 8) -> str: +def set_coordinates( + atoms: ndarray, V: ndarray, title: str = "", decimals: int = 8, set_atoms_as_symbols=True +) -> str: """ Print coordinates V with corresponding atoms to stdout in XYZ format. Parameters @@ -1466,6 +1463,9 @@ def set_coordinates(atoms: ndarray, V: ndarray, title: str = "", decimals: int = if N != len(atoms): raise ValueError("Mismatch between expected atoms and coordinate size") + if not isinstance(atoms[0], str) and set_atoms_as_symbols: + atoms = np.array([str_atom(atom) for atom in atoms]) + fmt = "{:<2}" + (" {:15." + str(decimals) + "f}") * 3 out = list() @@ -2280,9 +2280,8 @@ def main(args: Optional[List[str]] = None) -> str: if settings.print_only_rmsd_atoms or not use_view: q_coord_sub = np.dot(q_coord_sub, U) q_coord_sub += p_cent_sub - _atoms = np.asarray([str_atom(atom) for atom in q_atoms_sub]) return set_coordinates( - _atoms, + q_atoms_sub, q_coord_sub, title=f"Rotated '{settings.structure_b}' to match '{settings.structure_a}', with a RMSD of {result_rmsd:.8f}", ) @@ -2298,9 +2297,8 @@ def main(args: Optional[List[str]] = None) -> str: q_coord = np.dot(q_coord, U) q_coord += p_cent_sub - _atoms = np.asarray([str_atom(atom) for atom in q_atoms]) return set_coordinates( - _atoms, + q_atoms, q_coord, title=f"Rotated {settings.structure_b} to match {settings.structure_a}, with RMSD of {result_rmsd:.8f}", ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d4cde65 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,33 @@ +from pathlib import Path + +import numpy as np +from numpy import ndarray + +RESOURCE_PATH = Path("tests/resources") + + +def get_rotation_matrix(degrees: float) -> ndarray: + """https://en.wikipedia.org/wiki/Rotation_matrix""" + + radians = degrees * np.pi / 180.0 + + r11 = np.cos(radians) + r12 = -np.sin(radians) + r21 = np.sin(radians) + r22 = np.cos(radians) + + R = np.array([[r11, r12], [r21, r22]]) + + return R + + +def degree2radiant(degrees: float) -> float: + return degrees * np.pi / 180.0 + + +def rotate_coord(angle: float, coord: ndarray, axis: list[int] = [0, 1]): + U = get_rotation_matrix(angle) + _xy = np.dot(coord[:, axis], U) + _coord = np.array(coord, copy=True) + _coord[:, axis] = _xy + return _coord diff --git a/tests/context.py b/tests/context.py deleted file mode 100644 index 6f6ccea..0000000 --- a/tests/context.py +++ /dev/null @@ -1,21 +0,0 @@ -import subprocess -from pathlib import Path -from typing import List - -RESOURCE_PATH = Path("tests/resources") - - -def call_main(args: List[str]) -> List[str]: - - root_path = Path("./") - filename = root_path / "rmsd/calculate_rmsd.py" - - cmd = ["python", f"{filename}", *args] - - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - stdout, stderr = proc.communicate() - - if stderr is not None: - print(stderr.decode()) - - return stdout.decode().strip().split("\n") diff --git a/tests/resources/butane.xyz b/tests/resources/butane.xyz new file mode 100644 index 0000000..a516781 --- /dev/null +++ b/tests/resources/butane.xyz @@ -0,0 +1,16 @@ +14 + +C 2.142e00 1.395e00 -8.932e00 +C 3.631e00 1.416e00 -8.537e00 +C 4.203e00 -1.200e-02 -8.612e00 +C 5.691e00 9.000e-03 -8.218e00 +H 1.604e00 7.600e-01 -8.260e00 +H 1.745e00 2.388e00 -8.880e00 +H 2.043e00 1.024e00 -9.930e00 +H 4.169e00 2.051e00 -9.210e00 +H 3.731e00 1.788e00 -7.539e00 +H 3.665e00 -6.470e-01 -7.940e00 +H 4.104e00 -3.840e-01 -9.610e00 +H 6.088e00 -9.830e-01 -8.270e00 +H 5.791e00 3.810e-01 -7.220e00 +H 6.230e00 6.440e-01 -8.890e00 diff --git a/tests/resources/butane_prime.xyz b/tests/resources/butane_prime.xyz new file mode 100644 index 0000000..50f4d27 --- /dev/null +++ b/tests/resources/butane_prime.xyz @@ -0,0 +1,16 @@ +14 + +C 6.71454 -5.53848 -3.50851 +C 6.95865 -6.22697 -2.15264 +C 8.16747 -5.57632 -1.45606 +C 5.50518 -6.19016 -4.20589 +H 5.33617 -5.71137 -5.14853 +H 7.58263 -5.64795 -4.12498 +H 6.51851 -4.49883 -3.35011 +H 6.09092 -6.11832 -1.53660 +H 5.70232 -7.22908 -4.36475 +H 7.15558 -7.26640 -2.31068 +H 8.33668 -6.05459 -0.51425 +H 7.97144 -4.53667 -1.29765 +H 4.63745 -6.08152 -3.58986 +H 9.03610 -5.68475 -2.07173 diff --git a/tests/test_file_reading.py b/tests/test_file_reading.py index 7c24bbb..283047d 100644 --- a/tests/test_file_reading.py +++ b/tests/test_file_reading.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_kabsch.py b/tests/test_kabsch.py index 8640927..7f8399f 100644 --- a/tests/test_kabsch.py +++ b/tests/test_kabsch.py @@ -1,5 +1,5 @@ import numpy as np -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_kabsch_weighted.py b/tests/test_kabsch_weighted.py index 05ad144..4b54c8d 100644 --- a/tests/test_kabsch_weighted.py +++ b/tests/test_kabsch_weighted.py @@ -1,5 +1,5 @@ import numpy as np -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_main.py b/tests/test_main.py index 78321de..333926c 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from context import RESOURCE_PATH, call_main +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib from rmsd.calculate_rmsd import get_coordinates_xyz_lines @@ -52,8 +52,8 @@ def test_print_reflection_reorder() -> None: # Main call rmsd value args = f"--use-reflections --reorder {filename_a} {filename_b}" - stdout = call_main(args.split()) - value = float(stdout[-1]) + print(args.split()) + value = float(rmsdlib.main(args.split())) print(value) assert value is not None np.testing.assert_almost_equal(result_rmsd, value) diff --git a/tests/test_pdb.py b/tests/test_pdb.py index caa6bfe..aebd024 100644 --- a/tests/test_pdb.py +++ b/tests/test_pdb.py @@ -1,5 +1,5 @@ import pytest -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_quaternion.py b/tests/test_quaternion.py index a9d8f25..0b0773b 100644 --- a/tests/test_quaternion.py +++ b/tests/test_quaternion.py @@ -1,5 +1,5 @@ import numpy as np -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_reflections.py b/tests/test_reflections.py index aafdc66..2a4a080 100644 --- a/tests/test_reflections.py +++ b/tests/test_reflections.py @@ -1,7 +1,7 @@ import copy import numpy as np -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_reorder_inertia.py b/tests/test_reorder_inertia.py index 1b2e946..e773645 100644 --- a/tests/test_reorder_inertia.py +++ b/tests/test_reorder_inertia.py @@ -1,57 +1,93 @@ import numpy as np import rmsd as rmsdlib +from tests.conftest import RESOURCE_PATH, rotate_coord # type: ignore -def test_reorder_inertia_hungarian() -> None: - - # coordinates of scrambled and rotated butane - atoms = np.array(["C", "C", "C", "C", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H"]) - atoms_ = [rmsdlib.int_atom(atom) for atom in atoms] - atoms = np.array(atoms_) - - p_coord = np.array( - [ - [2.142e00, 1.395e00, -8.932e00], - [3.631e00, 1.416e00, -8.537e00], - [4.203e00, -1.200e-02, -8.612e00], - [5.691e00, 9.000e-03, -8.218e00], - [1.604e00, 7.600e-01, -8.260e00], - [1.745e00, 2.388e00, -8.880e00], - [2.043e00, 1.024e00, -9.930e00], - [4.169e00, 2.051e00, -9.210e00], - [3.731e00, 1.788e00, -7.539e00], - [3.665e00, -6.470e-01, -7.940e00], - [4.104e00, -3.840e-01, -9.610e00], - [6.088e00, -9.830e-01, -8.270e00], - [5.791e00, 3.810e-01, -7.220e00], - [6.230e00, 6.440e-01, -8.890e00], - ] - ) - - q_coord = np.array( - [ - [6.71454, -5.53848, -3.50851], - [6.95865, -6.22697, -2.15264], - [8.16747, -5.57632, -1.45606], - [5.50518, -6.19016, -4.20589], - [5.33617, -5.71137, -5.14853], - [7.58263, -5.64795, -4.12498], - [6.51851, -4.49883, -3.35011], - [6.09092, -6.11832, -1.53660], - [5.70232, -7.22908, -4.36475], - [7.15558, -7.26640, -2.31068], - [8.33668, -6.05459, -0.51425], - [7.97144, -4.53667, -1.29765], - [4.63745, -6.08152, -3.58986], - [9.03610, -5.68475, -2.07173], - ] - ) - - p_coord -= rmsdlib.centroid(p_coord) - q_coord -= rmsdlib.centroid(q_coord) - - review = rmsdlib.reorder_inertia_hungarian(atoms, atoms, p_coord, q_coord) - result_rmsd = rmsdlib.kabsch_rmsd(p_coord, q_coord[review]) +def test_reorder_inertia_hungarian_butane() -> None: + + filename_a = RESOURCE_PATH / "butane.xyz" + filename_b = RESOURCE_PATH / "butane_prime.xyz" + + atoms_a, coord_a = rmsdlib.get_coordinates_xyz(filename_a, return_atoms_as_int=True) + atoms_b, coord_b = rmsdlib.get_coordinates_xyz(filename_b, return_atoms_as_int=True) + + coord_a -= rmsdlib.centroid(coord_a) + coord_b -= rmsdlib.centroid(coord_b) + + review = rmsdlib.reorder_inertia_hungarian(atoms_a, atoms_b, coord_a, coord_b) + print(review) + + result_rmsd = rmsdlib.kabsch_rmsd(coord_a, coord_b[review]) np.testing.assert_almost_equal(0, result_rmsd, decimal=2) + + +def test_reorder_inertia_hungarian_complicated() -> None: + + filename_a = RESOURCE_PATH / "CHEMBL3039407.xyz" + + atoms_a, coord_a = rmsdlib.get_coordinates_xyz(filename_a, return_atoms_as_int=True) + atoms_b, coord_b = rmsdlib.get_coordinates_xyz(filename_a, return_atoms_as_int=True) + + coord_a -= rmsdlib.centroid(coord_a) + coord_b -= rmsdlib.centroid(coord_b) + + # Setup the problem + # Rotate a 30 degrees in xy and 30 degrees in yz planes + coord_a = rotate_coord(30, coord_a) + coord_a = rotate_coord(30, coord_a, axis=[1, 2]) + + # Randomize the atom order + ind = np.array(list(range(len(atoms_b)))) + rng = np.random.default_rng(seed=56) + rng.shuffle(ind) + atoms_b = atoms_b[ind] + coord_b = coord_b[ind] + + # Solve the problem + review = rmsdlib.reorder_inertia_hungarian(atoms_a, atoms_b, coord_a, coord_b) + print(review) + + result_rmsd = rmsdlib.kabsch_rmsd(coord_a, coord_b[review]) + print(result_rmsd) + + np.testing.assert_almost_equal(0.0, result_rmsd) + + +def test_reorder_and_distance() -> None: + + filename_a = RESOURCE_PATH / "CHEMBL3039407.xyz" + + atoms_a, coord_a = rmsdlib.get_coordinates_xyz(filename_a, return_atoms_as_int=True) + atoms_b, coord_b = rmsdlib.get_coordinates_xyz(filename_a, return_atoms_as_int=True) + + coord_a -= rmsdlib.centroid(coord_a) + coord_b -= rmsdlib.centroid(coord_b) + + # Setup the problem + # Rotate a 30 degrees in xy and 30 degrees in yz planes + coord_a = rotate_coord(30, coord_a) + coord_a = rotate_coord(30, coord_a, axis=[1, 2]) + + # Make coord_b have bigger bonds + coord_b = coord_b * 1.2 + answer = 0.8035106793656143 + + print(coord_b) + + # Randomize the atom order + ind = np.array(list(range(len(atoms_b)))) + rng = np.random.default_rng(seed=56) + rng.shuffle(ind) + atoms_b = atoms_b[ind] + coord_b = coord_b[ind] + + # Solve the problem + review = rmsdlib.reorder_inertia_hungarian(atoms_a, atoms_b, coord_a, coord_b) + print(review) + + result_rmsd = rmsdlib.kabsch_rmsd(coord_a, coord_b[review], translate=True) + print(result_rmsd) + + np.testing.assert_almost_equal(answer, result_rmsd, decimal=3) diff --git a/tests/test_reorder_print.py b/tests/test_reorder_print.py index e1b9764..fcad4d8 100644 --- a/tests/test_reorder_print.py +++ b/tests/test_reorder_print.py @@ -1,5 +1,5 @@ import numpy as np -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib from rmsd import get_coordinates_xyz, get_coordinates_xyz_lines diff --git a/tests/test_reorder_qml.py b/tests/test_reorder_qml.py index 7d31709..a64ae5c 100644 --- a/tests/test_reorder_qml.py +++ b/tests/test_reorder_qml.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from context import RESOURCE_PATH +from conftest import RESOURCE_PATH # type: ignore import rmsd as rmsdlib diff --git a/tests/test_rmsd.py b/tests/test_rmsd.py deleted file mode 100644 index e69de29..0000000