Native implementation of Shrake-Rupley algorithm for atomic solvent accessible surface areas. (#3274)

* KDtree-assisted implementation of ASA calculation using Shrake-Rupley algorithm.
This commit is contained in:
João Rodrigues
2020-09-17 16:42:42 -07:00
committed by GitHub
parent 36b5aee565
commit 1c0ccf25cb
4 changed files with 430 additions and 1 deletions

251
Bio/PDB/SASA.py Normal file
View File

@ -0,0 +1,251 @@
# Copyright (C) 2020, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Calculation of solvent accessible surface areas for Bio.PDB entities.
Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm,
which uses a sphere (of equal radius to a solvent molecule) to probe the
surface of the molecule.
Reference:
Shrake, A; Rupley, JA. (1973). J Mol Biol
"Environment and exposure to solvent of protein atoms. Lysozyme and insulin".
"""
import collections
import math
import numpy as np
from Bio.PDB.kdtrees import KDTree
__all__ = ["ShrakeRupley"]
_ENTITY_HIERARCHY = {
"A": 0,
"R": 1,
"C": 2,
"M": 3,
"S": 4,
}
# vdW radii taken from:
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
#
# Radii for CL, K, NA, etc are _not_ ionic radii.
#
# References:
# A. Bondi (1964). "van der Waals Volumes and Radii".
# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806.
ATOMIC_RADII = collections.defaultdict(lambda x: 2.0)
ATOMIC_RADII.update(
{
"H": 1.200,
"HE": 1.400,
"C": 1.700,
"N": 1.550,
"O": 1.520,
"F": 1.470,
"NA": 2.270,
"MG": 1.730,
"P": 1.800,
"S": 1.800,
"CL": 1.750,
"K": 2.750,
"CA": 2.310,
"NI": 1.630,
"CU": 1.400,
"ZN": 1.390,
"SE": 1.900,
"BR": 1.850,
"CD": 1.580,
"I": 1.980,
"HG": 1.550,
}
)
class ShrakeRupley:
"""Calculates SASAs using the Shrake-Rupley algorithm."""
def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None):
"""Initialize the class.
:param probe_radius: radius of the probe in A. Default is 1.40, roughly
the radius of a water molecule.
:type probe_radius: float
:param n_points: resolution of the surface of each atom. Default is 100.
A higher number of points results in more precise measurements, but
slows down the calculation.
:type n_points: int
:param radii_dict: user-provided dictionary of atomic radii to use in
the calculation. Values will replace/complement those in the
default ATOMIC_RADII dictionary.
:type radii_dict: dict
Examples:
>>> sr = ShrakeRupley()
>>> sr = ShrakeRupley(n_points=960)
>>> sr = ShrakeRupley(radii_dict={"O": 3.1415})
"""
if probe_radius <= 0.0:
raise ValueError(
f"Probe radius must be a positive number: {probe_radius} <= 0"
)
self.probe_radius = float(probe_radius)
if n_points < 1:
raise ValueError(
f"Number of sphere points must be larger than 1: {n_points}"
)
self.n_points = n_points
# Update radii list with user provided lists.
self.radii_dict = ATOMIC_RADII.copy()
if radii_dict is not None:
self.radii_dict.update(radii_dict)
# Pre-compute reference sphere
self._sphere = self._compute_sphere()
def _compute_sphere(self):
"""Return the 3D coordinates of n points on a sphere.
Uses the golden spiral algorithm to place points 'evenly' on the sphere
surface. We compute this once and then move the sphere to the centroid
of each atom as we compute the ASAs.
"""
n = self.n_points
dl = np.pi * (3 - 5 ** 0.5)
dz = 2.0 / n
longitude = 0
z = 1 - dz / 2
coords = np.zeros((n, 3), dtype=np.float32)
for k in range(n):
r = (1 - z * z) ** 0.5
coords[k, 0] = math.cos(longitude) * r
coords[k, 1] = math.sin(longitude) * r
coords[k, 2] = z
z -= dz
longitude += dl
return coords
def compute(self, entity, level="A"):
"""Calculate surface accessibility surface area for an entity.
The resulting atomic surface accessibility values are attached to the
.sasa attribute of each entity (or atom), depending on the level. For
example, if level="R", all residues will have a .sasa attribute. Atoms
will always be assigned a .sasa attribute with their individual values.
:param entity: input entity.
:type entity: Bio.PDB.Entity, e.g. Residue, Chain, ...
:param level: the level at which ASA values are assigned, which can be
one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or
"S" (Structure). The ASA value of an entity is the sum of all ASA
values of its children. Defaults to "A".
:type entity: Bio.PDB.Entity
Example:
>>> from Bio.PDB import PDBParser
>>> p = PDBParser(QUIET=1)
>>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb")
>>> sr = ShrakeRupley()
>>> sr.compute(struct, level="S")
>>> print(round(struct.sasa, 2))
7053.43
>>> print(round(struct[0]["A"][11]["OE1"].sasa, 2))
9.64
"""
is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"}
if not is_valid:
raise ValueError(
f"Invalid entity type '{type(entity)}'. "
"Must be Residue, Chain, Model, or Structure"
)
if level not in _ENTITY_HIERARCHY:
raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.")
elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]:
raise ValueError(
f"Level '{level}' must be equal or smaller than input entity: {entity.level}"
)
# Get atoms onto list for lookup
atoms = list(entity.get_atoms())
n_atoms = len(atoms)
if not n_atoms:
raise ValueError("Entity has no child atoms.")
# Get coordinates as a numpy array
# We trust DisorderedAtom and friends to pick representatives.
coords = np.array([a.coord for a in atoms], dtype=np.float64)
# Pre-compute atom neighbors using KDTree
kdt = KDTree(coords, 10)
# Pre-compute radius * probe table
radii_dict = self.radii_dict
radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64)
radii += self.probe_radius
twice_maxradii = np.max(radii) * 2
# Calculate ASAs
asa_array = np.zeros((n_atoms, 1), dtype=np.int)
ptset = set(range(self.n_points))
for i in range(n_atoms):
r_i = radii[i]
# Move sphere to atom
s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i]
available_set = ptset.copy()
# KDtree for sphere points
kdt_sphere = KDTree(s_on_i, 10)
# Iterate over neighbors of atom i
for jj in kdt.search(coords[i], twice_maxradii):
j = jj.index
if i == j:
continue
if jj.radius < (r_i + radii[j]):
# Remove overlapping points on sphere from available set
available_set -= {
pt.index for pt in kdt_sphere.search(coords[j], radii[j])
}
asa_array[i] = len(available_set) # update counts
# Convert accessible point count to surface area in A**2
f = radii * radii * (4 * np.pi / self.n_points)
asa_array = asa_array * f[:, np.newaxis]
# Set atom .sasa
for i, atom in enumerate(atoms):
atom.sasa = asa_array[i, 0]
# Aggregate values per entity level if necessary
if level != "A":
entities = set(atoms)
target = _ENTITY_HIERARCHY[level]
for _ in range(target):
entities = {e.parent for e in entities}
atomdict = {a.full_id: idx for idx, a in enumerate(atoms)}
for e in entities:
e_atoms = [atomdict[a.full_id] for a in e.get_atoms()]
e.sasa = asa_array[e_atoms].sum()

View File

@ -10,7 +10,16 @@
Includes: PDB and mmCIF parsers, a Structure class, a module to keep a local
copy of the PDB up-to-date, selective IO of PDB files, etc.
Author: Thomas Hamelryck. Additional code by Kristian Rother.
Original Author: Thomas Hamelryck.
Contributions by:
- Peter Cock
- Joe Greener
- Rob Miller
- Lenna X. Peterson
- Joao Rodrigues
- Kristian Rother
- Eric Talevich
- and many others.
"""
# Get a Structure object from a PDB file
@ -72,3 +81,10 @@ try:
from .NeighborSearch import NeighborSearch
except ImportError:
pass
# Native Shrake-Rupley algorithm for SASA calculations.
# Depends on kdtrees C module
try:
from .SASA import ShrakeRupley
except ImportError:
pass

View File

@ -23,6 +23,9 @@ centers of gravity or geometry.
New method ``disordered_remove()`` implemented in Bio.PDB DisorderedAtom and
DisorderedResidue to remove children.
New module Bio.PDB.SASA implements the Shrake-Rupley algorithm to calculate
atomic solvent accessible areas without third-party tools.
Many thanks to the Biopython developers and community for making this release
possible, especially the following contributors:

159
Tests/test_PDB_SASA.py Normal file
View File

@ -0,0 +1,159 @@
# Copyright 2020 Joao Rodrigues. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Unit tests for the Bio.PDB.SASA module: Surface Accessibility Calculations."""
import copy
import pathlib
import unittest
import warnings
from Bio.PDB import PDBParser
from Bio.PDB.SASA import ShrakeRupley
DATADIR = pathlib.Path(__file__).parent / "PDB"
class TestShrakeRupley(unittest.TestCase):
"""Tests for SR algorithm."""
# Expected values obtained with freesasa 2.0.3 and custom config file.
# e.g. cmd: --shrake-rupley --resolution 100 -n-threads 1 --probe-radius 1.4
@classmethod
def setUpClass(cls):
"""One-time setup for all tests."""
cls.parser = p = PDBParser(QUIET=1)
with warnings.catch_warnings():
structure = p.get_structure("X", DATADIR / "1LCD.pdb")
model = structure[0]
# Remove HETATM and Hs for simplicity/speed
for r in list(model.get_residues()):
if r.id[0] == " ":
for a in list(r):
if a.element == "H":
r.detach_child(a.name)
else:
c = r.parent
c.detach_child(r.id)
cls.model = model
# General Parameters
def test_default_algorithm(self):
"""Run Shrake-Rupley with default parameters."""
m = copy.deepcopy(self.model) # modifies atom.sasa
sasa = ShrakeRupley()
sasa.compute(m)
result = [a.sasa for a in m.get_atoms()][:5]
expected = [50.36, 31.40, 10.87, 12.86, 2.42]
for a, b in zip(result, expected):
self.assertAlmostEqual(a, b, places=2)
def test_higher_resolution(self):
"""Run Shrake-Rupley with 960 points per sphere."""
m = copy.deepcopy(self.model) # modifies atom.sasa
sasa = ShrakeRupley(n_points=960)
sasa.compute(m)
result = [a.sasa for a in m.get_atoms()][:5]
expected = [51.90, 31.45, 12.45, 12.72, 3.02]
for a, b in zip(result, expected):
self.assertAlmostEqual(a, b, places=2)
def test_custom_radii(self):
"""Run Shrake-Rupley with custom radii."""
m = copy.deepcopy(self.model) # modifies atom.sasa
sasa = ShrakeRupley(radii_dict={"C": 5.00})
sasa.compute(m)
result = [a.sasa for a in m.get_atoms()][:5]
expected = [0.0, 190.45, 41.18, 0.0, 36.03]
for a, b in zip(result, expected):
self.assertAlmostEqual(a, b, places=2)
# Compute parameters
def test_level_R(self):
"""Run Shrake-Rupley with level R."""
m = copy.deepcopy(self.model) # modifies atom.sasa
sasa = ShrakeRupley()
sasa.compute(m, level="R")
for r in m.get_residues():
atom_sum = sum(a.sasa for a in r)
self.assertAlmostEqual(atom_sum, r.sasa, places=2)
def test_level_C(self):
"""Run Shrake-Rupley with level C."""
m = copy.deepcopy(self.model) # modifies atom.sasa
sasa = ShrakeRupley()
sasa.compute(m, level="C")
for c in m.get_chains():
atom_sum = sum(a.sasa for a in c.get_atoms())
self.assertAlmostEqual(atom_sum, c.sasa, places=2)
# Exceptions
def test_fail_probe_radius(self):
"""Raise exception on bad probe_radius parameter."""
with self.assertRaisesRegex(ValueError, "must be a positive number"):
sasa = ShrakeRupley(probe_radius=-1.40)
def test_fail_n_points(self):
"""Raise exception on bad n_points parameter."""
with self.assertRaisesRegex(ValueError, "must be larger than 1"):
sasa = ShrakeRupley(n_points=0)
def test_fail_compute_entity_type(self):
"""Raise exception on unsupported entity type."""
with self.assertRaisesRegex(ValueError, "Invalid entity type"):
sasa = ShrakeRupley()
sasa.compute([1, 2, 3, 4, 5])
def test_fail_compute_entity_level(self):
"""Raise exception on input Atom entity."""
atom = list(self.model.get_atoms())[0]
with self.assertRaisesRegex(ValueError, "Invalid entity type"):
sasa = ShrakeRupley()
sasa.compute(atom)
def test_fail_compute_level_1(self):
"""Raise exception on invalid level parameter: X."""
with self.assertRaisesRegex(ValueError, "Invalid level"):
sasa = ShrakeRupley()
sasa.compute(self.model, level="X")
def test_fail_compute_level_2(self):
"""Raise exception on invalid level parameter: S > C."""
chain = self.model["A"]
with self.assertRaisesRegex(ValueError, "be equal or smaller than"):
sasa = ShrakeRupley()
sasa.compute(chain, level="S") # Chain is a child of Structure.
def test_fail_empty_entity(self):
"""Raise exception on invalid level parameter: S > C."""
sasa = ShrakeRupley()
r = copy.deepcopy(self.model["A"].child_list[0])
for a in list(r):
r.detach_child(a.name) # empty residue
self.assertEqual(len(r.child_list), 0)
with self.assertRaisesRegex(ValueError, "Entity has no child atoms"):
sasa.compute(r)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)