mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Native implementation of Shrake-Rupley algorithm for atomic solvent accessible surface areas. (#3274)
* KDtree-assisted implementation of ASA calculation using Shrake-Rupley algorithm.
This commit is contained in:
251
Bio/PDB/SASA.py
Normal file
251
Bio/PDB/SASA.py
Normal file
@ -0,0 +1,251 @@
|
||||
# Copyright (C) 2020, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
|
||||
#
|
||||
# This file is part of the Biopython distribution and governed by your
|
||||
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
||||
# Please see the LICENSE file that should have been included as part of this
|
||||
# package.
|
||||
|
||||
"""Calculation of solvent accessible surface areas for Bio.PDB entities.
|
||||
|
||||
Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm,
|
||||
which uses a sphere (of equal radius to a solvent molecule) to probe the
|
||||
surface of the molecule.
|
||||
|
||||
Reference:
|
||||
Shrake, A; Rupley, JA. (1973). J Mol Biol
|
||||
"Environment and exposure to solvent of protein atoms. Lysozyme and insulin".
|
||||
"""
|
||||
|
||||
import collections
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
from Bio.PDB.kdtrees import KDTree
|
||||
|
||||
__all__ = ["ShrakeRupley"]
|
||||
|
||||
_ENTITY_HIERARCHY = {
|
||||
"A": 0,
|
||||
"R": 1,
|
||||
"C": 2,
|
||||
"M": 3,
|
||||
"S": 4,
|
||||
}
|
||||
|
||||
# vdW radii taken from:
|
||||
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
|
||||
#
|
||||
# Radii for CL, K, NA, etc are _not_ ionic radii.
|
||||
#
|
||||
# References:
|
||||
# A. Bondi (1964). "van der Waals Volumes and Radii".
|
||||
# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806.
|
||||
ATOMIC_RADII = collections.defaultdict(lambda x: 2.0)
|
||||
ATOMIC_RADII.update(
|
||||
{
|
||||
"H": 1.200,
|
||||
"HE": 1.400,
|
||||
"C": 1.700,
|
||||
"N": 1.550,
|
||||
"O": 1.520,
|
||||
"F": 1.470,
|
||||
"NA": 2.270,
|
||||
"MG": 1.730,
|
||||
"P": 1.800,
|
||||
"S": 1.800,
|
||||
"CL": 1.750,
|
||||
"K": 2.750,
|
||||
"CA": 2.310,
|
||||
"NI": 1.630,
|
||||
"CU": 1.400,
|
||||
"ZN": 1.390,
|
||||
"SE": 1.900,
|
||||
"BR": 1.850,
|
||||
"CD": 1.580,
|
||||
"I": 1.980,
|
||||
"HG": 1.550,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ShrakeRupley:
|
||||
"""Calculates SASAs using the Shrake-Rupley algorithm."""
|
||||
|
||||
def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None):
|
||||
"""Initialize the class.
|
||||
|
||||
:param probe_radius: radius of the probe in A. Default is 1.40, roughly
|
||||
the radius of a water molecule.
|
||||
:type probe_radius: float
|
||||
|
||||
:param n_points: resolution of the surface of each atom. Default is 100.
|
||||
A higher number of points results in more precise measurements, but
|
||||
slows down the calculation.
|
||||
:type n_points: int
|
||||
|
||||
:param radii_dict: user-provided dictionary of atomic radii to use in
|
||||
the calculation. Values will replace/complement those in the
|
||||
default ATOMIC_RADII dictionary.
|
||||
:type radii_dict: dict
|
||||
|
||||
Examples:
|
||||
>>> sr = ShrakeRupley()
|
||||
>>> sr = ShrakeRupley(n_points=960)
|
||||
>>> sr = ShrakeRupley(radii_dict={"O": 3.1415})
|
||||
"""
|
||||
if probe_radius <= 0.0:
|
||||
raise ValueError(
|
||||
f"Probe radius must be a positive number: {probe_radius} <= 0"
|
||||
)
|
||||
|
||||
self.probe_radius = float(probe_radius)
|
||||
|
||||
if n_points < 1:
|
||||
raise ValueError(
|
||||
f"Number of sphere points must be larger than 1: {n_points}"
|
||||
)
|
||||
self.n_points = n_points
|
||||
|
||||
# Update radii list with user provided lists.
|
||||
self.radii_dict = ATOMIC_RADII.copy()
|
||||
if radii_dict is not None:
|
||||
self.radii_dict.update(radii_dict)
|
||||
|
||||
# Pre-compute reference sphere
|
||||
self._sphere = self._compute_sphere()
|
||||
|
||||
def _compute_sphere(self):
|
||||
"""Return the 3D coordinates of n points on a sphere.
|
||||
|
||||
Uses the golden spiral algorithm to place points 'evenly' on the sphere
|
||||
surface. We compute this once and then move the sphere to the centroid
|
||||
of each atom as we compute the ASAs.
|
||||
"""
|
||||
n = self.n_points
|
||||
|
||||
dl = np.pi * (3 - 5 ** 0.5)
|
||||
dz = 2.0 / n
|
||||
|
||||
longitude = 0
|
||||
z = 1 - dz / 2
|
||||
|
||||
coords = np.zeros((n, 3), dtype=np.float32)
|
||||
for k in range(n):
|
||||
r = (1 - z * z) ** 0.5
|
||||
coords[k, 0] = math.cos(longitude) * r
|
||||
coords[k, 1] = math.sin(longitude) * r
|
||||
coords[k, 2] = z
|
||||
z -= dz
|
||||
longitude += dl
|
||||
|
||||
return coords
|
||||
|
||||
def compute(self, entity, level="A"):
|
||||
"""Calculate surface accessibility surface area for an entity.
|
||||
|
||||
The resulting atomic surface accessibility values are attached to the
|
||||
.sasa attribute of each entity (or atom), depending on the level. For
|
||||
example, if level="R", all residues will have a .sasa attribute. Atoms
|
||||
will always be assigned a .sasa attribute with their individual values.
|
||||
|
||||
:param entity: input entity.
|
||||
:type entity: Bio.PDB.Entity, e.g. Residue, Chain, ...
|
||||
|
||||
:param level: the level at which ASA values are assigned, which can be
|
||||
one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or
|
||||
"S" (Structure). The ASA value of an entity is the sum of all ASA
|
||||
values of its children. Defaults to "A".
|
||||
:type entity: Bio.PDB.Entity
|
||||
|
||||
Example:
|
||||
>>> from Bio.PDB import PDBParser
|
||||
>>> p = PDBParser(QUIET=1)
|
||||
>>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb")
|
||||
>>> sr = ShrakeRupley()
|
||||
>>> sr.compute(struct, level="S")
|
||||
>>> print(round(struct.sasa, 2))
|
||||
7053.43
|
||||
>>> print(round(struct[0]["A"][11]["OE1"].sasa, 2))
|
||||
9.64
|
||||
"""
|
||||
is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"}
|
||||
if not is_valid:
|
||||
raise ValueError(
|
||||
f"Invalid entity type '{type(entity)}'. "
|
||||
"Must be Residue, Chain, Model, or Structure"
|
||||
)
|
||||
|
||||
if level not in _ENTITY_HIERARCHY:
|
||||
raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.")
|
||||
elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]:
|
||||
raise ValueError(
|
||||
f"Level '{level}' must be equal or smaller than input entity: {entity.level}"
|
||||
)
|
||||
|
||||
# Get atoms onto list for lookup
|
||||
atoms = list(entity.get_atoms())
|
||||
n_atoms = len(atoms)
|
||||
if not n_atoms:
|
||||
raise ValueError("Entity has no child atoms.")
|
||||
|
||||
# Get coordinates as a numpy array
|
||||
# We trust DisorderedAtom and friends to pick representatives.
|
||||
coords = np.array([a.coord for a in atoms], dtype=np.float64)
|
||||
|
||||
# Pre-compute atom neighbors using KDTree
|
||||
kdt = KDTree(coords, 10)
|
||||
|
||||
# Pre-compute radius * probe table
|
||||
radii_dict = self.radii_dict
|
||||
radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64)
|
||||
radii += self.probe_radius
|
||||
twice_maxradii = np.max(radii) * 2
|
||||
|
||||
# Calculate ASAs
|
||||
asa_array = np.zeros((n_atoms, 1), dtype=np.int)
|
||||
ptset = set(range(self.n_points))
|
||||
for i in range(n_atoms):
|
||||
|
||||
r_i = radii[i]
|
||||
|
||||
# Move sphere to atom
|
||||
s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i]
|
||||
available_set = ptset.copy()
|
||||
|
||||
# KDtree for sphere points
|
||||
kdt_sphere = KDTree(s_on_i, 10)
|
||||
|
||||
# Iterate over neighbors of atom i
|
||||
for jj in kdt.search(coords[i], twice_maxradii):
|
||||
j = jj.index
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
if jj.radius < (r_i + radii[j]):
|
||||
# Remove overlapping points on sphere from available set
|
||||
available_set -= {
|
||||
pt.index for pt in kdt_sphere.search(coords[j], radii[j])
|
||||
}
|
||||
|
||||
asa_array[i] = len(available_set) # update counts
|
||||
|
||||
# Convert accessible point count to surface area in A**2
|
||||
f = radii * radii * (4 * np.pi / self.n_points)
|
||||
asa_array = asa_array * f[:, np.newaxis]
|
||||
|
||||
# Set atom .sasa
|
||||
for i, atom in enumerate(atoms):
|
||||
atom.sasa = asa_array[i, 0]
|
||||
|
||||
# Aggregate values per entity level if necessary
|
||||
if level != "A":
|
||||
entities = set(atoms)
|
||||
target = _ENTITY_HIERARCHY[level]
|
||||
for _ in range(target):
|
||||
entities = {e.parent for e in entities}
|
||||
|
||||
atomdict = {a.full_id: idx for idx, a in enumerate(atoms)}
|
||||
for e in entities:
|
||||
e_atoms = [atomdict[a.full_id] for a in e.get_atoms()]
|
||||
e.sasa = asa_array[e_atoms].sum()
|
@ -10,7 +10,16 @@
|
||||
Includes: PDB and mmCIF parsers, a Structure class, a module to keep a local
|
||||
copy of the PDB up-to-date, selective IO of PDB files, etc.
|
||||
|
||||
Author: Thomas Hamelryck. Additional code by Kristian Rother.
|
||||
Original Author: Thomas Hamelryck.
|
||||
Contributions by:
|
||||
- Peter Cock
|
||||
- Joe Greener
|
||||
- Rob Miller
|
||||
- Lenna X. Peterson
|
||||
- Joao Rodrigues
|
||||
- Kristian Rother
|
||||
- Eric Talevich
|
||||
- and many others.
|
||||
"""
|
||||
|
||||
# Get a Structure object from a PDB file
|
||||
@ -72,3 +81,10 @@ try:
|
||||
from .NeighborSearch import NeighborSearch
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Native Shrake-Rupley algorithm for SASA calculations.
|
||||
# Depends on kdtrees C module
|
||||
try:
|
||||
from .SASA import ShrakeRupley
|
||||
except ImportError:
|
||||
pass
|
||||
|
3
NEWS.rst
3
NEWS.rst
@ -23,6 +23,9 @@ centers of gravity or geometry.
|
||||
New method ``disordered_remove()`` implemented in Bio.PDB DisorderedAtom and
|
||||
DisorderedResidue to remove children.
|
||||
|
||||
New module Bio.PDB.SASA implements the Shrake-Rupley algorithm to calculate
|
||||
atomic solvent accessible areas without third-party tools.
|
||||
|
||||
Many thanks to the Biopython developers and community for making this release
|
||||
possible, especially the following contributors:
|
||||
|
||||
|
159
Tests/test_PDB_SASA.py
Normal file
159
Tests/test_PDB_SASA.py
Normal file
@ -0,0 +1,159 @@
|
||||
# Copyright 2020 Joao Rodrigues. All rights reserved.
|
||||
#
|
||||
# This file is part of the Biopython distribution and governed by your
|
||||
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
||||
# Please see the LICENSE file that should have been included as part of this
|
||||
# package.
|
||||
|
||||
"""Unit tests for the Bio.PDB.SASA module: Surface Accessibility Calculations."""
|
||||
|
||||
import copy
|
||||
import pathlib
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from Bio.PDB import PDBParser
|
||||
from Bio.PDB.SASA import ShrakeRupley
|
||||
|
||||
DATADIR = pathlib.Path(__file__).parent / "PDB"
|
||||
|
||||
|
||||
class TestShrakeRupley(unittest.TestCase):
|
||||
"""Tests for SR algorithm."""
|
||||
|
||||
# Expected values obtained with freesasa 2.0.3 and custom config file.
|
||||
# e.g. cmd: --shrake-rupley --resolution 100 -n-threads 1 --probe-radius 1.4
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""One-time setup for all tests."""
|
||||
cls.parser = p = PDBParser(QUIET=1)
|
||||
with warnings.catch_warnings():
|
||||
structure = p.get_structure("X", DATADIR / "1LCD.pdb")
|
||||
model = structure[0]
|
||||
|
||||
# Remove HETATM and Hs for simplicity/speed
|
||||
for r in list(model.get_residues()):
|
||||
if r.id[0] == " ":
|
||||
for a in list(r):
|
||||
if a.element == "H":
|
||||
r.detach_child(a.name)
|
||||
else:
|
||||
c = r.parent
|
||||
c.detach_child(r.id)
|
||||
|
||||
cls.model = model
|
||||
|
||||
# General Parameters
|
||||
def test_default_algorithm(self):
|
||||
"""Run Shrake-Rupley with default parameters."""
|
||||
m = copy.deepcopy(self.model) # modifies atom.sasa
|
||||
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(m)
|
||||
|
||||
result = [a.sasa for a in m.get_atoms()][:5]
|
||||
expected = [50.36, 31.40, 10.87, 12.86, 2.42]
|
||||
for a, b in zip(result, expected):
|
||||
self.assertAlmostEqual(a, b, places=2)
|
||||
|
||||
def test_higher_resolution(self):
|
||||
"""Run Shrake-Rupley with 960 points per sphere."""
|
||||
m = copy.deepcopy(self.model) # modifies atom.sasa
|
||||
|
||||
sasa = ShrakeRupley(n_points=960)
|
||||
sasa.compute(m)
|
||||
|
||||
result = [a.sasa for a in m.get_atoms()][:5]
|
||||
expected = [51.90, 31.45, 12.45, 12.72, 3.02]
|
||||
for a, b in zip(result, expected):
|
||||
self.assertAlmostEqual(a, b, places=2)
|
||||
|
||||
def test_custom_radii(self):
|
||||
"""Run Shrake-Rupley with custom radii."""
|
||||
m = copy.deepcopy(self.model) # modifies atom.sasa
|
||||
|
||||
sasa = ShrakeRupley(radii_dict={"C": 5.00})
|
||||
sasa.compute(m)
|
||||
|
||||
result = [a.sasa for a in m.get_atoms()][:5]
|
||||
expected = [0.0, 190.45, 41.18, 0.0, 36.03]
|
||||
for a, b in zip(result, expected):
|
||||
self.assertAlmostEqual(a, b, places=2)
|
||||
|
||||
# Compute parameters
|
||||
def test_level_R(self):
|
||||
"""Run Shrake-Rupley with level R."""
|
||||
m = copy.deepcopy(self.model) # modifies atom.sasa
|
||||
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(m, level="R")
|
||||
|
||||
for r in m.get_residues():
|
||||
atom_sum = sum(a.sasa for a in r)
|
||||
self.assertAlmostEqual(atom_sum, r.sasa, places=2)
|
||||
|
||||
def test_level_C(self):
|
||||
"""Run Shrake-Rupley with level C."""
|
||||
m = copy.deepcopy(self.model) # modifies atom.sasa
|
||||
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(m, level="C")
|
||||
|
||||
for c in m.get_chains():
|
||||
atom_sum = sum(a.sasa for a in c.get_atoms())
|
||||
self.assertAlmostEqual(atom_sum, c.sasa, places=2)
|
||||
|
||||
# Exceptions
|
||||
def test_fail_probe_radius(self):
|
||||
"""Raise exception on bad probe_radius parameter."""
|
||||
with self.assertRaisesRegex(ValueError, "must be a positive number"):
|
||||
sasa = ShrakeRupley(probe_radius=-1.40)
|
||||
|
||||
def test_fail_n_points(self):
|
||||
"""Raise exception on bad n_points parameter."""
|
||||
with self.assertRaisesRegex(ValueError, "must be larger than 1"):
|
||||
sasa = ShrakeRupley(n_points=0)
|
||||
|
||||
def test_fail_compute_entity_type(self):
|
||||
"""Raise exception on unsupported entity type."""
|
||||
with self.assertRaisesRegex(ValueError, "Invalid entity type"):
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute([1, 2, 3, 4, 5])
|
||||
|
||||
def test_fail_compute_entity_level(self):
|
||||
"""Raise exception on input Atom entity."""
|
||||
atom = list(self.model.get_atoms())[0]
|
||||
with self.assertRaisesRegex(ValueError, "Invalid entity type"):
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(atom)
|
||||
|
||||
def test_fail_compute_level_1(self):
|
||||
"""Raise exception on invalid level parameter: X."""
|
||||
with self.assertRaisesRegex(ValueError, "Invalid level"):
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(self.model, level="X")
|
||||
|
||||
def test_fail_compute_level_2(self):
|
||||
"""Raise exception on invalid level parameter: S > C."""
|
||||
chain = self.model["A"]
|
||||
with self.assertRaisesRegex(ValueError, "be equal or smaller than"):
|
||||
sasa = ShrakeRupley()
|
||||
sasa.compute(chain, level="S") # Chain is a child of Structure.
|
||||
|
||||
def test_fail_empty_entity(self):
|
||||
"""Raise exception on invalid level parameter: S > C."""
|
||||
sasa = ShrakeRupley()
|
||||
|
||||
r = copy.deepcopy(self.model["A"].child_list[0])
|
||||
for a in list(r):
|
||||
r.detach_child(a.name) # empty residue
|
||||
|
||||
self.assertEqual(len(r.child_list), 0)
|
||||
with self.assertRaisesRegex(ValueError, "Entity has no child atoms"):
|
||||
sasa.compute(r)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
unittest.main(testRunner=runner)
|
Reference in New Issue
Block a user