biopython/Tests/test_PDB_DSSP.py

# Copyright 2009-2011 by Eric Talevich.  All rights reserved.
# Revisions copyright 2009-2013 by Peter Cock.  All rights reserved.
# Revisions copyright 2013 Lenna X. Peterson. All rights reserved.
# Revisions copyright 2020 Joao Rodrigues. All rights reserved.
#
# Converted by Eric Talevich from an older unit test copyright 2002
# by Thomas Hamelryck.
#
# Merged related test files into one, by Joao Rodrigues (2020)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Unit tests for the Bio.PDB.DSSP submodule."""

import re
import subprocess
import unittest
import warnings

try:
    import numpy as np  # noqa: F401
except ImportError:
    from Bio import MissingPythonDependencyError

    raise MissingPythonDependencyError(
        "Install NumPy if you want to use Bio.PDB."
    ) from None


from Bio.PDB import DSSP
from Bio.PDB import make_dssp_dict
from Bio.PDB import MMCIFParser
from Bio.PDB import PDBParser

VERSION_2_2_0 = (2, 2, 0)


def parse_dssp_version(version_string):
    """Parse the DSSP version into a tuple from the tool output."""
    match = re.search(r"\s*([\d.]+)", version_string)
    if match:
        version = match.group(1)
    return tuple(map(int, version.split(".")))


def will_it_float(s):  # well played, whoever this was :)
    """Convert the input into a float if it is a number.

    If the input is a string, the output does not change.
    """
    try:
        return float(s)
    except ValueError:
        return s


class DSSP_tool_test(unittest.TestCase):
    """Test calling DSSP from Bio.PDB."""

    @classmethod
    def setUpClass(cls):
        cls.dssp_version = (0, 0, 0)
        is_dssp_available = False
        # Check if DSSP is installed
        quiet_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
        try:
            try:
                # Newer versions of DSSP
                version_string = subprocess.check_output(
                    ["dssp", "--version"], text=True
                )
                cls.dssp_version = parse_dssp_version(version_string)
                is_dssp_available = True
            except subprocess.CalledProcessError:
                # Older versions of DSSP
                subprocess.check_call(["dssp", "-h"], **quiet_kwargs)
                is_dssp_available = True
        except OSError:
            try:
                version_string = subprocess.check_output(
                    ["mkdssp", "--version"], text=True
                )
                cls.dssp_version = parse_dssp_version(version_string)
                is_dssp_available = True
            except OSError:
                pass

        if not is_dssp_available:
            raise unittest.SkipTest(
                "Install dssp if you want to use it from Biopython."
            )

        cls.pdbparser = PDBParser()
        cls.cifparser = MMCIFParser()

    def test_dssp(self):
        """Test DSSP generation from PDB."""
        pdbfile = "PDB/2BEG.pdb"
        model = self.pdbparser.get_structure("2BEG", pdbfile)[0]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # silence DSSP warnings
            dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 130)

    # Only run mmCIF tests if DSSP version installed supports mmcif
    def test_dssp_with_mmcif_file(self):
        """Test DSSP generation from MMCIF."""
        if self.dssp_version < VERSION_2_2_0:
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/4ZHL.cif"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # silence all warnings
            model = self.cifparser.get_structure("4ZHL", pdbfile)[0]
            dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 257)

    def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
        """Test DSSP generation from MMCIF with non-standard residues."""
        if self.dssp_version < VERSION_2_2_0:
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/1AS5.cif"
        model = self.cifparser.get_structure("1AS5", pdbfile)[0]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # silence DSSP warnings
            dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 24)

    def test_dssp_with_mmcif_file_and_different_chain_ids(self):
        """Test DSSP generation from MMCIF which has different label and author chain IDs."""
        if self.dssp_version < VERSION_2_2_0:
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/1A7G.cif"
        model = self.cifparser.get_structure("1A7G", pdbfile)[0]
        dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 82)
        self.assertEqual(dssp.keys()[0][0], "E")


class DSSP_test(unittest.TestCase):
    """Tests for DSSP parsing etc which don't need the binary tool."""

    def test_DSSP_file(self):
        """Test parsing of pregenerated DSSP."""
        dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
        self.assertEqual(len(dssp), 130)

    def test_DSSP_noheader_file(self):
        """Test parsing of pregenerated DSSP missing header information."""
        # New DSSP prints a line containing only whitespace and "."
        dssp, keys = make_dssp_dict("PDB/2BEG_noheader.dssp")
        self.assertEqual(len(dssp), 130)

    def test_DSSP_hbonds(self):
        """Test parsing of DSSP hydrogen bond information."""
        dssp, keys = make_dssp_dict("PDB/2BEG.dssp")

        dssp_indices = {v[5] for v in dssp.values()}
        hb_indices = set()

        # The integers preceding each hydrogen bond energy (kcal/mol) in the
        # "N-H-->O    O-->H-N    N-H-->O    O-->H-N" dssp output columns are
        # relative dssp indices. Therefore, "hb_indices" contains the absolute
        # dssp indices of residues participating in (provisional) h-bonds. Note
        # that actual h-bonds are typically determined by an energetic
        # threshold.
        for val in dssp.values():
            hb_indices |= {val[5] + x for x in (val[6], val[8], val[10], val[12])}

        # Check if all h-bond partner indices were successfully parsed.
        self.assertEqual((dssp_indices & hb_indices), hb_indices)

    def test_DSSP_in_model_obj(self):
        """All elements correctly added to xtra attribute of input model object."""
        p = PDBParser()
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        # Read the DSSP data into the pdb object:
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
        # Now compare the xtra attribute of the pdb object
        # residue by residue with the pre-computed values:
        i = 0
        with open("PDB/dssp_xtra_Sander.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    # Split the pre-computed values into a list:
                    xtra_list_ref = ref_lines[i].rstrip().split("\t")
                    # Then convert each element to float where possible:
                    xtra_list_ref = list(map(will_it_float, xtra_list_ref))
                    # The xtra attribute is a dict.
                    # To compare with the pre-computed values first sort according to keys:
                    xtra_itemts = sorted(
                        res.xtra.items(), key=lambda s: s[0]
                    )  # noqa: E731
                    # Then extract the list of xtra values for the residue
                    # and convert to floats where possible:
                    xtra_list = [t[1] for t in xtra_itemts]
                    xtra_list = list(map(will_it_float, xtra_list))
                    # The reason for converting to float is, that casting a float to a string in python2.6
                    # will include fewer decimals than python3 and an assertion error will be thrown.
                    self.assertEqual(xtra_list, xtra_list_ref)
                    i += 1

    def test_DSSP_RSA(self):
        """Tests the usage of different ASA tables."""
        # Tests include Sander/default, Wilke and Miller
        p = PDBParser()
        # Sander/default:
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        # Read the DSSP data into the pdb object:
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
        # Then compare the RASA values for each residue with the pre-computed values:
        i = 0
        with open("PDB/Sander_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1

        # Wilke (procedure similar as for the Sander values above):
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Wilke", "DSSP")
        i = 0
        with open("PDB/Wilke_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1

        # Miller (procedure similar as for the Sander values above):
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Miller", "DSSP")
        i = 0
        with open("PDB/Miller_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1

        # Ahmad (procedure similar as for the Sander values above):
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Ahmad", "DSSP")
        i = 0
        with open("PDB/Ahmad_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)