mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
276 lines
11 KiB
Python
276 lines
11 KiB
Python
# Copyright 2009-2011 by Eric Talevich. All rights reserved.
|
|
# Revisions copyright 2009-2013 by Peter Cock. All rights reserved.
|
|
# Revisions copyright 2013 Lenna X. Peterson. All rights reserved.
|
|
# Revisions copyright 2020 Joao Rodrigues. All rights reserved.
|
|
#
|
|
# Converted by Eric Talevich from an older unit test copyright 2002
|
|
# by Thomas Hamelryck.
|
|
#
|
|
# Merged related test files into one, by Joao Rodrigues (2020)
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
"""Unit tests for the Bio.PDB.DSSP submodule."""
|
|
|
|
import re
|
|
import subprocess
|
|
import unittest
|
|
import warnings
|
|
|
|
try:
|
|
import numpy as np # noqa: F401
|
|
except ImportError:
|
|
from Bio import MissingPythonDependencyError
|
|
|
|
raise MissingPythonDependencyError(
|
|
"Install NumPy if you want to use Bio.PDB."
|
|
) from None
|
|
|
|
|
|
from Bio.PDB import DSSP
|
|
from Bio.PDB import make_dssp_dict
|
|
from Bio.PDB import MMCIFParser
|
|
from Bio.PDB import PDBParser
|
|
|
|
VERSION_2_2_0 = (2, 2, 0)
|
|
|
|
|
|
def parse_dssp_version(version_string):
|
|
"""Parse the DSSP version into a tuple from the tool output."""
|
|
match = re.search(r"\s*([\d.]+)", version_string)
|
|
if match:
|
|
version = match.group(1)
|
|
return tuple(map(int, version.split(".")))
|
|
|
|
|
|
def will_it_float(s): # well played, whoever this was :)
|
|
"""Convert the input into a float if it is a number.
|
|
|
|
If the input is a string, the output does not change.
|
|
"""
|
|
try:
|
|
return float(s)
|
|
except ValueError:
|
|
return s
|
|
|
|
|
|
class DSSP_tool_test(unittest.TestCase):
|
|
"""Test calling DSSP from Bio.PDB."""
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls.dssp_version = (0, 0, 0)
|
|
is_dssp_available = False
|
|
# Check if DSSP is installed
|
|
quiet_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
try:
|
|
try:
|
|
# Newer versions of DSSP
|
|
version_string = subprocess.check_output(
|
|
["dssp", "--version"], text=True
|
|
)
|
|
cls.dssp_version = parse_dssp_version(version_string)
|
|
is_dssp_available = True
|
|
except subprocess.CalledProcessError:
|
|
# Older versions of DSSP
|
|
subprocess.check_call(["dssp", "-h"], **quiet_kwargs)
|
|
is_dssp_available = True
|
|
except OSError:
|
|
try:
|
|
version_string = subprocess.check_output(
|
|
["mkdssp", "--version"], text=True
|
|
)
|
|
cls.dssp_version = parse_dssp_version(version_string)
|
|
is_dssp_available = True
|
|
except OSError:
|
|
pass
|
|
|
|
if not is_dssp_available:
|
|
raise unittest.SkipTest(
|
|
"Install dssp if you want to use it from Biopython."
|
|
)
|
|
|
|
cls.pdbparser = PDBParser()
|
|
cls.cifparser = MMCIFParser()
|
|
|
|
def test_dssp(self):
|
|
"""Test DSSP generation from PDB."""
|
|
pdbfile = "PDB/2BEG.pdb"
|
|
model = self.pdbparser.get_structure("2BEG", pdbfile)[0]
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore") # silence DSSP warnings
|
|
dssp = DSSP(model, pdbfile)
|
|
self.assertEqual(len(dssp), 130)
|
|
|
|
# Only run mmCIF tests if DSSP version installed supports mmcif
|
|
def test_dssp_with_mmcif_file(self):
|
|
"""Test DSSP generation from MMCIF."""
|
|
if self.dssp_version < VERSION_2_2_0:
|
|
self.skipTest("Test requires DSSP version 2.2.0 or greater")
|
|
|
|
pdbfile = "PDB/4ZHL.cif"
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore") # silence all warnings
|
|
model = self.cifparser.get_structure("4ZHL", pdbfile)[0]
|
|
dssp = DSSP(model, pdbfile)
|
|
self.assertEqual(len(dssp), 257)
|
|
|
|
def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
|
|
"""Test DSSP generation from MMCIF with non-standard residues."""
|
|
if self.dssp_version < VERSION_2_2_0:
|
|
self.skipTest("Test requires DSSP version 2.2.0 or greater")
|
|
|
|
pdbfile = "PDB/1AS5.cif"
|
|
model = self.cifparser.get_structure("1AS5", pdbfile)[0]
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore") # silence DSSP warnings
|
|
dssp = DSSP(model, pdbfile)
|
|
self.assertEqual(len(dssp), 24)
|
|
|
|
def test_dssp_with_mmcif_file_and_different_chain_ids(self):
|
|
"""Test DSSP generation from MMCIF which has different label and author chain IDs."""
|
|
if self.dssp_version < VERSION_2_2_0:
|
|
self.skipTest("Test requires DSSP version 2.2.0 or greater")
|
|
|
|
pdbfile = "PDB/1A7G.cif"
|
|
model = self.cifparser.get_structure("1A7G", pdbfile)[0]
|
|
dssp = DSSP(model, pdbfile)
|
|
self.assertEqual(len(dssp), 82)
|
|
self.assertEqual(dssp.keys()[0][0], "E")
|
|
|
|
|
|
class DSSP_test(unittest.TestCase):
|
|
"""Tests for DSSP parsing etc which don't need the binary tool."""
|
|
|
|
def test_DSSP_file(self):
|
|
"""Test parsing of pregenerated DSSP."""
|
|
dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
|
|
self.assertEqual(len(dssp), 130)
|
|
|
|
def test_DSSP_noheader_file(self):
|
|
"""Test parsing of pregenerated DSSP missing header information."""
|
|
# New DSSP prints a line containing only whitespace and "."
|
|
dssp, keys = make_dssp_dict("PDB/2BEG_noheader.dssp")
|
|
self.assertEqual(len(dssp), 130)
|
|
|
|
def test_DSSP_hbonds(self):
|
|
"""Test parsing of DSSP hydrogen bond information."""
|
|
dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
|
|
|
|
dssp_indices = {v[5] for v in dssp.values()}
|
|
hb_indices = set()
|
|
|
|
# The integers preceding each hydrogen bond energy (kcal/mol) in the
|
|
# "N-H-->O O-->H-N N-H-->O O-->H-N" dssp output columns are
|
|
# relative dssp indices. Therefore, "hb_indices" contains the absolute
|
|
# dssp indices of residues participating in (provisional) h-bonds. Note
|
|
# that actual h-bonds are typically determined by an energetic
|
|
# threshold.
|
|
for val in dssp.values():
|
|
hb_indices |= {val[5] + x for x in (val[6], val[8], val[10], val[12])}
|
|
|
|
# Check if all h-bond partner indices were successfully parsed.
|
|
self.assertEqual((dssp_indices & hb_indices), hb_indices)
|
|
|
|
def test_DSSP_in_model_obj(self):
|
|
"""All elements correctly added to xtra attribute of input model object."""
|
|
p = PDBParser()
|
|
s = p.get_structure("example", "PDB/2BEG.pdb")
|
|
m = s[0]
|
|
# Read the DSSP data into the pdb object:
|
|
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
|
|
# Now compare the xtra attribute of the pdb object
|
|
# residue by residue with the pre-computed values:
|
|
i = 0
|
|
with open("PDB/dssp_xtra_Sander.txt") as fh_ref:
|
|
ref_lines = fh_ref.readlines()
|
|
for chain in m:
|
|
for res in chain:
|
|
# Split the pre-computed values into a list:
|
|
xtra_list_ref = ref_lines[i].rstrip().split("\t")
|
|
# Then convert each element to float where possible:
|
|
xtra_list_ref = list(map(will_it_float, xtra_list_ref))
|
|
# The xtra attribute is a dict.
|
|
# To compare with the pre-computed values first sort according to keys:
|
|
xtra_itemts = sorted(
|
|
res.xtra.items(), key=lambda s: s[0]
|
|
) # noqa: E731
|
|
# Then extract the list of xtra values for the residue
|
|
# and convert to floats where possible:
|
|
xtra_list = [t[1] for t in xtra_itemts]
|
|
xtra_list = list(map(will_it_float, xtra_list))
|
|
# The reason for converting to float is, that casting a float to a string in python2.6
|
|
# will include fewer decimals than python3 and an assertion error will be thrown.
|
|
self.assertEqual(xtra_list, xtra_list_ref)
|
|
i += 1
|
|
|
|
def test_DSSP_RSA(self):
|
|
"""Tests the usage of different ASA tables."""
|
|
# Tests include Sander/default, Wilke and Miller
|
|
p = PDBParser()
|
|
# Sander/default:
|
|
s = p.get_structure("example", "PDB/2BEG.pdb")
|
|
m = s[0]
|
|
# Read the DSSP data into the pdb object:
|
|
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
|
|
# Then compare the RASA values for each residue with the pre-computed values:
|
|
i = 0
|
|
with open("PDB/Sander_RASA.txt") as fh_ref:
|
|
ref_lines = fh_ref.readlines()
|
|
for chain in m:
|
|
for res in chain:
|
|
rasa_ref = float(ref_lines[i].rstrip())
|
|
rasa = float(res.xtra["EXP_DSSP_RASA"])
|
|
self.assertAlmostEqual(rasa, rasa_ref)
|
|
i += 1
|
|
|
|
# Wilke (procedure similar as for the Sander values above):
|
|
s = p.get_structure("example", "PDB/2BEG.pdb")
|
|
m = s[0]
|
|
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Wilke", "DSSP")
|
|
i = 0
|
|
with open("PDB/Wilke_RASA.txt") as fh_ref:
|
|
ref_lines = fh_ref.readlines()
|
|
for chain in m:
|
|
for res in chain:
|
|
rasa_ref = float(ref_lines[i].rstrip())
|
|
rasa = float(res.xtra["EXP_DSSP_RASA"])
|
|
self.assertAlmostEqual(rasa, rasa_ref)
|
|
i += 1
|
|
|
|
# Miller (procedure similar as for the Sander values above):
|
|
s = p.get_structure("example", "PDB/2BEG.pdb")
|
|
m = s[0]
|
|
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Miller", "DSSP")
|
|
i = 0
|
|
with open("PDB/Miller_RASA.txt") as fh_ref:
|
|
ref_lines = fh_ref.readlines()
|
|
for chain in m:
|
|
for res in chain:
|
|
rasa_ref = float(ref_lines[i].rstrip())
|
|
rasa = float(res.xtra["EXP_DSSP_RASA"])
|
|
self.assertAlmostEqual(rasa, rasa_ref)
|
|
i += 1
|
|
|
|
# Ahmad (procedure similar as for the Sander values above):
|
|
s = p.get_structure("example", "PDB/2BEG.pdb")
|
|
m = s[0]
|
|
_ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Ahmad", "DSSP")
|
|
i = 0
|
|
with open("PDB/Ahmad_RASA.txt") as fh_ref:
|
|
ref_lines = fh_ref.readlines()
|
|
for chain in m:
|
|
for res in chain:
|
|
rasa_ref = float(ref_lines[i].rstrip())
|
|
rasa = float(res.xtra["EXP_DSSP_RASA"])
|
|
self.assertAlmostEqual(rasa, rasa_ref)
|
|
i += 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|