mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
252 lines
8.3 KiB
Python
252 lines
8.3 KiB
Python
# Copyright (C) 2020, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
"""Calculation of solvent accessible surface areas for Bio.PDB entities.
|
|
|
|
Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm,
|
|
which uses a sphere (of equal radius to a solvent molecule) to probe the
|
|
surface of the molecule.
|
|
|
|
Reference:
|
|
Shrake, A; Rupley, JA. (1973). J Mol Biol
|
|
"Environment and exposure to solvent of protein atoms. Lysozyme and insulin".
|
|
"""
|
|
|
|
import collections
|
|
import math
|
|
from typing import MutableMapping
|
|
|
|
import numpy as np
|
|
|
|
from Bio.PDB.kdtrees import KDTree
|
|
|
|
__all__ = ["ShrakeRupley"]
|
|
|
|
_ENTITY_HIERARCHY = {
|
|
"A": 0,
|
|
"R": 1,
|
|
"C": 2,
|
|
"M": 3,
|
|
"S": 4,
|
|
}
|
|
|
|
# vdW radii taken from:
|
|
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
|
|
#
|
|
# Radii for CL, K, NA, etc are _not_ ionic radii.
|
|
#
|
|
# References:
|
|
# A. Bondi (1964). "van der Waals Volumes and Radii".
|
|
# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806.
|
|
ATOMIC_RADII: MutableMapping[str, float] = collections.defaultdict(lambda: 2.0)
|
|
ATOMIC_RADII.update(
|
|
{
|
|
"H": 1.200,
|
|
"HE": 1.400,
|
|
"C": 1.700,
|
|
"N": 1.550,
|
|
"O": 1.520,
|
|
"F": 1.470,
|
|
"NA": 2.270,
|
|
"MG": 1.730,
|
|
"P": 1.800,
|
|
"S": 1.800,
|
|
"CL": 1.750,
|
|
"K": 2.750,
|
|
"CA": 2.310,
|
|
"NI": 1.630,
|
|
"CU": 1.400,
|
|
"ZN": 1.390,
|
|
"SE": 1.900,
|
|
"BR": 1.850,
|
|
"CD": 1.580,
|
|
"I": 1.980,
|
|
"HG": 1.550,
|
|
}
|
|
)
|
|
|
|
|
|
class ShrakeRupley:
|
|
"""Calculates SASAs using the Shrake-Rupley algorithm."""
|
|
|
|
def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None):
|
|
"""Initialize the class.
|
|
|
|
:param probe_radius: radius of the probe in A. Default is 1.40, roughly
|
|
the radius of a water molecule.
|
|
:type probe_radius: float
|
|
|
|
:param n_points: resolution of the surface of each atom. Default is 100.
|
|
A higher number of points results in more precise measurements, but
|
|
slows down the calculation.
|
|
:type n_points: int
|
|
|
|
:param radii_dict: user-provided dictionary of atomic radii to use in
|
|
the calculation. Values will replace/complement those in the
|
|
default ATOMIC_RADII dictionary.
|
|
:type radii_dict: dict
|
|
|
|
>>> sr = ShrakeRupley()
|
|
>>> sr = ShrakeRupley(n_points=960)
|
|
>>> sr = ShrakeRupley(radii_dict={"O": 3.1415})
|
|
"""
|
|
if probe_radius <= 0.0:
|
|
raise ValueError(
|
|
f"Probe radius must be a positive number: {probe_radius} <= 0"
|
|
)
|
|
|
|
self.probe_radius = float(probe_radius)
|
|
|
|
if n_points < 1:
|
|
raise ValueError(
|
|
f"Number of sphere points must be larger than 1: {n_points}"
|
|
)
|
|
self.n_points = n_points
|
|
|
|
# Update radii list with user provided lists.
|
|
self.radii_dict = ATOMIC_RADII.copy()
|
|
if radii_dict is not None:
|
|
self.radii_dict.update(radii_dict)
|
|
|
|
# Pre-compute reference sphere
|
|
self._sphere = self._compute_sphere()
|
|
|
|
def _compute_sphere(self):
|
|
"""Return the 3D coordinates of n points on a sphere.
|
|
|
|
Uses the golden spiral algorithm to place points 'evenly' on the sphere
|
|
surface. We compute this once and then move the sphere to the centroid
|
|
of each atom as we compute the ASAs.
|
|
"""
|
|
n = self.n_points
|
|
|
|
dl = np.pi * (3 - 5**0.5)
|
|
dz = 2.0 / n
|
|
|
|
longitude = 0
|
|
z = 1 - dz / 2
|
|
|
|
coords = np.zeros((n, 3), dtype=np.float32)
|
|
for k in range(n):
|
|
r = (1 - z * z) ** 0.5
|
|
coords[k, 0] = math.cos(longitude) * r
|
|
coords[k, 1] = math.sin(longitude) * r
|
|
coords[k, 2] = z
|
|
z -= dz
|
|
longitude += dl
|
|
|
|
return coords
|
|
|
|
def compute(self, entity, level="A"):
|
|
"""Calculate surface accessibility surface area for an entity.
|
|
|
|
The resulting atomic surface accessibility values are attached to the
|
|
.sasa attribute of each entity (or atom), depending on the level. For
|
|
example, if level="R", all residues will have a .sasa attribute. Atoms
|
|
will always be assigned a .sasa attribute with their individual values.
|
|
|
|
:param entity: input entity.
|
|
:type entity: Bio.PDB.Entity, e.g. Residue, Chain, ...
|
|
|
|
:param level: the level at which ASA values are assigned, which can be
|
|
one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or
|
|
"S" (Structure). The ASA value of an entity is the sum of all ASA
|
|
values of its children. Defaults to "A".
|
|
:type entity: Bio.PDB.Entity
|
|
|
|
>>> from Bio.PDB import PDBParser
|
|
>>> from Bio.PDB.SASA import ShrakeRupley
|
|
>>> p = PDBParser(QUIET=1)
|
|
>>> # This assumes you have a local copy of 1LCD.pdb in a directory called "PDB"
|
|
>>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb")
|
|
>>> sr = ShrakeRupley()
|
|
>>> sr.compute(struct, level="S")
|
|
>>> print(round(struct.sasa, 2))
|
|
7053.43
|
|
>>> print(round(struct[0]["A"][11]["OE1"].sasa, 2))
|
|
9.64
|
|
"""
|
|
is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"}
|
|
if not is_valid:
|
|
raise ValueError(
|
|
f"Invalid entity type '{type(entity)}'. "
|
|
"Must be Residue, Chain, Model, or Structure"
|
|
)
|
|
|
|
if level not in _ENTITY_HIERARCHY:
|
|
raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.")
|
|
elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]:
|
|
raise ValueError(
|
|
f"Level '{level}' must be equal or smaller than input entity: {entity.level}"
|
|
)
|
|
|
|
# Get atoms onto list for lookup
|
|
atoms = list(entity.get_atoms())
|
|
n_atoms = len(atoms)
|
|
if not n_atoms:
|
|
raise ValueError("Entity has no child atoms.")
|
|
|
|
# Get coordinates as a numpy array
|
|
# We trust DisorderedAtom and friends to pick representatives.
|
|
coords = np.array([a.coord for a in atoms], dtype=np.float64)
|
|
|
|
# Pre-compute atom neighbors using KDTree
|
|
kdt = KDTree(coords, 10)
|
|
|
|
# Pre-compute radius * probe table
|
|
radii_dict = self.radii_dict
|
|
radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64)
|
|
radii += self.probe_radius
|
|
twice_maxradii = np.max(radii) * 2
|
|
|
|
# Calculate ASAs
|
|
asa_array = np.zeros((n_atoms, 1), dtype=np.int64)
|
|
ptset = set(range(self.n_points))
|
|
for i in range(n_atoms):
|
|
r_i = radii[i]
|
|
|
|
# Move sphere to atom
|
|
s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i]
|
|
available_set = ptset.copy()
|
|
|
|
# KDtree for sphere points
|
|
kdt_sphere = KDTree(s_on_i, 10)
|
|
|
|
# Iterate over neighbors of atom i
|
|
for jj in kdt.search(coords[i], twice_maxradii):
|
|
j = jj.index
|
|
if i == j:
|
|
continue
|
|
|
|
if jj.radius < (r_i + radii[j]):
|
|
# Remove overlapping points on sphere from available set
|
|
available_set -= {
|
|
pt.index for pt in kdt_sphere.search(coords[j], radii[j])
|
|
}
|
|
|
|
asa_array[i] = len(available_set) # update counts
|
|
|
|
# Convert accessible point count to surface area in A**2
|
|
f = radii * radii * (4 * np.pi / self.n_points)
|
|
asa_array = asa_array * f[:, np.newaxis]
|
|
|
|
# Set atom .sasa
|
|
for i, atom in enumerate(atoms):
|
|
atom.sasa = asa_array[i, 0]
|
|
|
|
# Aggregate values per entity level if necessary
|
|
if level != "A":
|
|
entities = set(atoms)
|
|
target = _ENTITY_HIERARCHY[level]
|
|
for _ in range(target):
|
|
entities = {e.parent for e in entities}
|
|
|
|
atomdict = {a.full_id: idx for idx, a in enumerate(atoms)}
|
|
for e in entities:
|
|
e_atoms = [atomdict[a.full_id] for a in e.get_atoms()]
|
|
e.sasa = asa_array[e_atoms].sum()
|