mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
@ -290,13 +290,11 @@ class Atom:
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
self.name == other.name
|
self.name == other.name
|
||||||
and self.bfactor == other.bfactor
|
and np.isclose(self.bfactor, other.bfactor)
|
||||||
and self.occupancy == other.occupancy
|
and np.isclose(self.occupancy, other.occupancy)
|
||||||
and self.altloc == other.altloc
|
and self.altloc == other.altloc
|
||||||
and self.fullname == other.fullname
|
and self.fullname == other.fullname
|
||||||
and np.allclose(self.coord, other.coord)
|
and (np.allclose(self.coord, other.coord) if compare_coordinates else True)
|
||||||
if compare_coordinates
|
|
||||||
else True
|
|
||||||
and getattr(self, "element", None) == getattr(self, "element", None)
|
and getattr(self, "element", None) == getattr(self, "element", None)
|
||||||
and getattr(self, "pqr_charge", None) == getattr(self, "pqr_charge", None)
|
and getattr(self, "pqr_charge", None) == getattr(self, "pqr_charge", None)
|
||||||
and getattr(self, "radius", None) == getattr(self, "radius", None)
|
and getattr(self, "radius", None) == getattr(self, "radius", None)
|
||||||
|
@ -33,7 +33,6 @@ except ImportError:
|
|||||||
|
|
||||||
# Get a Structure object from a PDB file
|
# Get a Structure object from a PDB file
|
||||||
from .PDBParser import PDBParser
|
from .PDBParser import PDBParser
|
||||||
|
|
||||||
from .MMCIFParser import MMCIFParser
|
from .MMCIFParser import MMCIFParser
|
||||||
from .MMCIFParser import FastMMCIFParser
|
from .MMCIFParser import FastMMCIFParser
|
||||||
from .PDBMLParser import PDBMLParser
|
from .PDBMLParser import PDBMLParser
|
||||||
|
207
Bio/PDB/bcifhelpermodule.c
Normal file
207
Bio/PDB/bcifhelpermodule.c
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
#define PY_SSIZE_T_CLEAN
|
||||||
|
#include <Python.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
integer_unpack_u8(Py_buffer *in_view, Py_buffer *out_view)
|
||||||
|
{
|
||||||
|
Py_ssize_t in_size = in_view->shape[0];
|
||||||
|
Py_ssize_t in_index = 0;
|
||||||
|
Py_ssize_t out_index = 0;
|
||||||
|
|
||||||
|
uint8_t *in_data = in_view->buf;
|
||||||
|
uint32_t *out_data = out_view->buf;
|
||||||
|
|
||||||
|
while (in_index < in_size) {
|
||||||
|
uint32_t sum = in_data[in_index];
|
||||||
|
|
||||||
|
if (sum == UINT8_MAX) {
|
||||||
|
while (in_index + 1 < in_size) {
|
||||||
|
in_index += 1;
|
||||||
|
sum += in_data[in_index];
|
||||||
|
|
||||||
|
if (in_data[in_index] != UINT8_MAX) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out_data[out_index] = sum;
|
||||||
|
in_index += 1;
|
||||||
|
out_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
integer_unpack_u16(Py_buffer *in_view, Py_buffer *out_view)
|
||||||
|
{
|
||||||
|
Py_ssize_t in_size = in_view->shape[0];
|
||||||
|
Py_ssize_t in_index = 0;
|
||||||
|
Py_ssize_t out_index = 0;
|
||||||
|
|
||||||
|
uint16_t *in_data = in_view->buf;
|
||||||
|
uint32_t *out_data = out_view->buf;
|
||||||
|
|
||||||
|
while (in_index < in_size) {
|
||||||
|
uint32_t sum = in_data[in_index];
|
||||||
|
|
||||||
|
if (sum == UINT16_MAX) {
|
||||||
|
while (in_index + 1 < in_size) {
|
||||||
|
in_index += 1;
|
||||||
|
sum += in_data[in_index];
|
||||||
|
|
||||||
|
if (in_data[in_index] != UINT16_MAX) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out_data[out_index] = sum;
|
||||||
|
in_index += 1;
|
||||||
|
out_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
integer_unpack_i8(Py_buffer *in_view, Py_buffer *out_view)
|
||||||
|
{
|
||||||
|
Py_ssize_t in_size = in_view->shape[0];
|
||||||
|
Py_ssize_t in_index = 0;
|
||||||
|
Py_ssize_t out_index = 0;
|
||||||
|
|
||||||
|
int8_t *in_data = in_view->buf;
|
||||||
|
int32_t *out_data = out_view->buf;
|
||||||
|
|
||||||
|
while (in_index < in_size) {
|
||||||
|
int32_t sum = in_data[in_index];
|
||||||
|
|
||||||
|
if (sum == INT8_MAX || sum == INT8_MIN) {
|
||||||
|
while (in_index + 1 < in_size) {
|
||||||
|
in_index += 1;
|
||||||
|
sum += in_data[in_index];
|
||||||
|
|
||||||
|
if (in_data[in_index] != INT8_MAX && in_data[in_index] != INT8_MIN) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out_data[out_index] = sum;
|
||||||
|
in_index += 1;
|
||||||
|
out_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
integer_unpack_i16(Py_buffer *in_view, Py_buffer *out_view)
|
||||||
|
{
|
||||||
|
Py_ssize_t in_size = in_view->shape[0];
|
||||||
|
Py_ssize_t in_index = 0;
|
||||||
|
Py_ssize_t out_index = 0;
|
||||||
|
|
||||||
|
int16_t *in_data = in_view->buf;
|
||||||
|
int32_t *out_data = out_view->buf;
|
||||||
|
|
||||||
|
while (in_index < in_size) {
|
||||||
|
int32_t sum = in_data[in_index];
|
||||||
|
|
||||||
|
if (sum == INT16_MAX || sum == INT16_MIN) {
|
||||||
|
while (in_index + 1 < in_size) {
|
||||||
|
in_index += 1;
|
||||||
|
sum += in_data[in_index];
|
||||||
|
|
||||||
|
if (in_data[in_index] != INT16_MAX && in_data[in_index] != INT16_MIN) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out_data[out_index] = sum;
|
||||||
|
in_index += 1;
|
||||||
|
out_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
integer_unpack(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
PyObject *in = NULL;
|
||||||
|
PyObject *out = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "OO", &in, &out)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_buffer in_view, out_view;
|
||||||
|
const int flags = PyBUF_ND | PyBUF_FORMAT;
|
||||||
|
|
||||||
|
if (PyObject_GetBuffer(in, &in_view, flags) != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (PyObject_GetBuffer(out, &out_view, flags | PyBUF_WRITABLE) != 0) {
|
||||||
|
PyBuffer_Release(&in_view);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_view.ndim != 1) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "First argument should be one-dimensional.");
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (out_view.ndim != 1) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "Second argument should be one-dimensional.");
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char format = in_view.format[0];
|
||||||
|
|
||||||
|
if (format == 'B') {
|
||||||
|
integer_unpack_u8(&in_view, &out_view);
|
||||||
|
}
|
||||||
|
else if (format == 'H') {
|
||||||
|
integer_unpack_u16(&in_view, &out_view);
|
||||||
|
}
|
||||||
|
else if (format == 'b') {
|
||||||
|
integer_unpack_i8(&in_view, &out_view);
|
||||||
|
}
|
||||||
|
else if (format == 'h') {
|
||||||
|
integer_unpack_i16(&in_view, &out_view);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Unexpected buffer format: %s",
|
||||||
|
in_view.format);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
PyBuffer_Release(&in_view);
|
||||||
|
PyBuffer_Release(&out_view);
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
return Py_None;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMethodDef IntegerUnpackMethods[] = {
|
||||||
|
{"integer_unpack", integer_unpack, METH_VARARGS, NULL},
|
||||||
|
{NULL, NULL, 0, NULL}
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct PyModuleDef moduledef = {
|
||||||
|
PyModuleDef_HEAD_INIT,
|
||||||
|
"_bcif_helper",
|
||||||
|
NULL,
|
||||||
|
-1,
|
||||||
|
IntegerUnpackMethods
|
||||||
|
};
|
||||||
|
|
||||||
|
PyMODINIT_FUNC
|
||||||
|
PyInit__bcif_helper(void)
|
||||||
|
{
|
||||||
|
PyObject *m;
|
||||||
|
|
||||||
|
m = PyModule_Create(&moduledef);
|
||||||
|
if (!m) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
306
Bio/PDB/binary_cif.py
Normal file
306
Bio/PDB/binary_cif.py
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
"""
|
||||||
|
A module to interact with BinaryCIF-formatted files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
try:
|
||||||
|
import msgpack
|
||||||
|
except ImportError:
|
||||||
|
from Bio import MissingPythonDependencyError
|
||||||
|
|
||||||
|
raise MissingPythonDependencyError(
|
||||||
|
"Install msgpack to use Bio.PDB.binaryCIF (e.g. pip install msgpack)"
|
||||||
|
) from None
|
||||||
|
|
||||||
|
import Bio.PDB._bcif_helper as _bcif_helper
|
||||||
|
from Bio.PDB.Structure import Structure
|
||||||
|
from Bio.PDB.StructureBuilder import StructureBuilder
|
||||||
|
|
||||||
|
|
||||||
|
# https://github.com/ihmwg/python-ihm/blob/main/ihm/format_bcif.py
|
||||||
|
# https://numpy.org/doc/stable/reference/arrays.dtypes.html#
|
||||||
|
# The "<" tells NumPy to use little endian representation.
|
||||||
|
# BinaryCIF always uses little endian.
|
||||||
|
_dtypes = {
|
||||||
|
1: np.dtype("<i1"), # Int8
|
||||||
|
2: np.dtype("<i2"), # Int16
|
||||||
|
3: np.dtype("<i4"), # Int32
|
||||||
|
4: np.dtype("<u1"), # UInt8
|
||||||
|
5: np.dtype("<u2"), # UInt16
|
||||||
|
6: np.dtype("<u4"), # UInt32
|
||||||
|
32: np.dtype("<f4"), # Float32
|
||||||
|
33: np.dtype("<f8"), # Float64
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _byte_array_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "ByteArray"
|
||||||
|
|
||||||
|
dtype = _dtypes[encoding["type"]]
|
||||||
|
column["data"]["data"] = np.frombuffer(column["data"]["data"], dtype)
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _fixed_point_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "FixedPoint"
|
||||||
|
|
||||||
|
dtype = _dtypes[encoding["srcType"]]
|
||||||
|
factor = encoding["factor"]
|
||||||
|
data = column["data"]["data"]
|
||||||
|
assert data.dtype.type in (np.int32, np.uint32)
|
||||||
|
decoded_data = np.divide(data, factor, dtype=dtype)
|
||||||
|
|
||||||
|
column["data"]["data"] = decoded_data
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _interval_quantization_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "IntervalQuantization"
|
||||||
|
|
||||||
|
min_val = encoding["min"]
|
||||||
|
max_val = encoding["max"]
|
||||||
|
num_steps = encoding["num_steps"]
|
||||||
|
delta = max_val - min_val / (num_steps - 1)
|
||||||
|
data = column["data"]["data"]
|
||||||
|
dtype = _dtypes[encoding["srcType"]]
|
||||||
|
decoded_data = np.add(min_val, np.multiply(data, delta, dtype=dtype), dtype=dtype)
|
||||||
|
|
||||||
|
column["data"]["data"] = decoded_data
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_length_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "RunLength"
|
||||||
|
|
||||||
|
data = column["data"]["data"]
|
||||||
|
dtype = _dtypes[encoding["srcType"]]
|
||||||
|
decoded_data = np.repeat(data[::2].astype(dtype), data[1::2])
|
||||||
|
|
||||||
|
assert len(decoded_data) == encoding["srcSize"]
|
||||||
|
column["data"]["data"] = decoded_data
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _delta_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "Delta"
|
||||||
|
|
||||||
|
dtype = _dtypes[encoding["srcType"]]
|
||||||
|
data = column["data"]["data"]
|
||||||
|
decoded_data = data.astype(dtype, copy=False)
|
||||||
|
decoded_data[0] += encoding["origin"]
|
||||||
|
decoded_data.cumsum(out=decoded_data)
|
||||||
|
|
||||||
|
column["data"]["data"] = decoded_data
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _integer_packing_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "IntegerPacking"
|
||||||
|
|
||||||
|
byte_count = encoding["byteCount"]
|
||||||
|
src_size = encoding["srcSize"]
|
||||||
|
is_unsigned = encoding["isUnsigned"]
|
||||||
|
|
||||||
|
if is_unsigned:
|
||||||
|
dtype = np.dtype("<u4")
|
||||||
|
else:
|
||||||
|
dtype = np.dtype("<i4")
|
||||||
|
|
||||||
|
data = column["data"]["data"]
|
||||||
|
assert byte_count == data.dtype.itemsize
|
||||||
|
assert np.issubdtype(data.dtype, np.unsignedinteger) == is_unsigned
|
||||||
|
decoded_data = np.empty((src_size,), dtype)
|
||||||
|
_bcif_helper.integer_unpack(data, decoded_data)
|
||||||
|
|
||||||
|
column["data"]["data"] = decoded_data
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _string_array_decoder(column):
|
||||||
|
encoding = column["data"]["encoding"][-1]
|
||||||
|
assert encoding["kind"] == "StringArray"
|
||||||
|
|
||||||
|
offsets_column = {
|
||||||
|
"data": {
|
||||||
|
"data": encoding["offsets"],
|
||||||
|
"encoding": encoding["offsetEncoding"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lookup_column = {
|
||||||
|
"data": {
|
||||||
|
"data": column["data"]["data"],
|
||||||
|
"encoding": encoding["dataEncoding"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
string_data = encoding["stringData"]
|
||||||
|
offsets = _decode(offsets_column)
|
||||||
|
unique_strings = np.empty((len(offsets) - 1,), dtype=object)
|
||||||
|
|
||||||
|
for index in range(len(unique_strings)):
|
||||||
|
unique_string = string_data[offsets[index] : offsets[index + 1]]
|
||||||
|
unique_strings[index] = unique_string
|
||||||
|
|
||||||
|
lookups = _decode(lookup_column)
|
||||||
|
column["data"]["data"] = unique_strings[lookups]
|
||||||
|
column["data"]["encoding"].pop()
|
||||||
|
|
||||||
|
|
||||||
|
_decoders = {
|
||||||
|
"ByteArray": _byte_array_decoder,
|
||||||
|
"FixedPoint": _fixed_point_decoder,
|
||||||
|
"IntervalQuantization": _interval_quantization_decoder,
|
||||||
|
"RunLength": _run_length_decoder,
|
||||||
|
"Delta": _delta_decoder,
|
||||||
|
"IntegerPacking": _integer_packing_decoder,
|
||||||
|
"StringArray": _string_array_decoder,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _decode(column):
|
||||||
|
# Note that decode modifies the column.
|
||||||
|
encodings = deque(column["data"]["encoding"])
|
||||||
|
column["data"]["encoding"] = encodings
|
||||||
|
|
||||||
|
while encodings:
|
||||||
|
encoding = encodings[-1]
|
||||||
|
_decoders[encoding["kind"]](column)
|
||||||
|
|
||||||
|
return column["data"]["data"]
|
||||||
|
|
||||||
|
|
||||||
|
class BinaryCIFParser:
|
||||||
|
"""A parser for BinaryCIF files.
|
||||||
|
|
||||||
|
See the `BinaryCIF specification <https://github.com/molstar/BinaryCIF>`_.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize a BinaryCIF parser."""
|
||||||
|
self._structure_builder = StructureBuilder()
|
||||||
|
|
||||||
|
def _get_hetero_field(self, atom_group: str, component_id: str) -> str:
|
||||||
|
if atom_group == "HETATM":
|
||||||
|
hetero_field = "W" if component_id in ("HOH", "WAT") else "H"
|
||||||
|
else:
|
||||||
|
hetero_field = " "
|
||||||
|
|
||||||
|
return hetero_field
|
||||||
|
|
||||||
|
def _get_residue_ids(self, columns):
|
||||||
|
atom_groups = _decode(columns["_atom_site.group_PDB"])
|
||||||
|
component_ids = _decode(columns["_atom_site.label_comp_id"])
|
||||||
|
hetero_fields = [
|
||||||
|
self._get_hetero_field(atom_group, component_id)
|
||||||
|
for atom_group, component_id in zip(atom_groups, component_ids)
|
||||||
|
]
|
||||||
|
insertion_codes = [
|
||||||
|
code or " " for code in _decode(columns["_atom_site.pdbx_PDB_ins_code"])
|
||||||
|
]
|
||||||
|
sequence_ids = _decode(columns["_atom_site.auth_seq_id"])
|
||||||
|
|
||||||
|
return list(zip(hetero_fields, sequence_ids, insertion_codes))
|
||||||
|
|
||||||
|
def _get_atoms(self, columns):
|
||||||
|
names = _decode(columns["_atom_site.label_atom_id"])
|
||||||
|
x_list = _decode(columns["_atom_site.Cartn_x"])
|
||||||
|
y_list = _decode(columns["_atom_site.Cartn_y"])
|
||||||
|
z_list = _decode(columns["_atom_site.Cartn_z"])
|
||||||
|
coordinates_list = np.stack((x_list, y_list, z_list), axis=1)
|
||||||
|
b_factors = _decode(columns["_atom_site.B_iso_or_equiv"])
|
||||||
|
occupancies = _decode(columns["_atom_site.occupancy"])
|
||||||
|
alt_ids = [
|
||||||
|
str(alt_id or " ") for alt_id in _decode(columns["_atom_site.label_alt_id"])
|
||||||
|
]
|
||||||
|
serial_numbers = _decode(columns["_atom_site.id"])
|
||||||
|
type_symbols = _decode(columns["_atom_site.type_symbol"])
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": names[index],
|
||||||
|
"coord": coordinates_list[index],
|
||||||
|
"b_factor": b_factors[index],
|
||||||
|
"occupancy": occupancies[index],
|
||||||
|
"altloc": alt_ids[index],
|
||||||
|
"fullname": names[index],
|
||||||
|
"serial_number": serial_numbers[index],
|
||||||
|
"element": type_symbols[index],
|
||||||
|
}
|
||||||
|
for index in range(len(serial_numbers))
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_structure(self, id: Optional[str], source: str) -> Structure:
|
||||||
|
"""Parse and return the PDB structure from a BinaryCIF file.
|
||||||
|
|
||||||
|
:param str id: the PDB code for this structure
|
||||||
|
:param str source: the path to the BinaryCIF file
|
||||||
|
:return: the PDB structure
|
||||||
|
:rtype: Bio.PDB.Structure.Structure
|
||||||
|
"""
|
||||||
|
if hasattr(source, "seek"):
|
||||||
|
# This resets the source if source is a file handle.
|
||||||
|
source.seek(0)
|
||||||
|
|
||||||
|
with (
|
||||||
|
gzip.open(source, mode="rb")
|
||||||
|
if source.endswith(".gz")
|
||||||
|
else open(source, mode="rb")
|
||||||
|
) as file:
|
||||||
|
result = msgpack.unpack(file, use_list=True)
|
||||||
|
|
||||||
|
columns = {
|
||||||
|
f"{category['name']}.{column['name']}": column
|
||||||
|
for data_block in result["dataBlocks"]
|
||||||
|
for category in data_block["categories"]
|
||||||
|
for column in category["columns"]
|
||||||
|
}
|
||||||
|
|
||||||
|
atom_model_numbers = _decode(columns["_atom_site.pdbx_PDB_model_num"])
|
||||||
|
atom_chain_ids = _decode(columns["_atom_site.label_asym_id"])
|
||||||
|
atom_residue_ids = self._get_residue_ids(columns)
|
||||||
|
atom_component_ids = _decode(columns["_atom_site.label_comp_id"])
|
||||||
|
atoms = self._get_atoms(columns)
|
||||||
|
|
||||||
|
entry_id = _decode(columns["_entry.id"])[0]
|
||||||
|
self._structure_builder.init_structure(id or entry_id)
|
||||||
|
builder_model_count = 0
|
||||||
|
builder_model_number = None
|
||||||
|
builder_chain_id = None
|
||||||
|
builder_residue_id = None
|
||||||
|
builder_component_id = None
|
||||||
|
|
||||||
|
for index in range(len(atom_model_numbers)):
|
||||||
|
model_number = atom_model_numbers[index]
|
||||||
|
chain_id = atom_chain_ids[index]
|
||||||
|
residue_id = atom_residue_ids[index]
|
||||||
|
component_id = atom_component_ids[index]
|
||||||
|
|
||||||
|
if model_number != builder_model_number:
|
||||||
|
self._structure_builder.init_model(builder_model_count, model_number)
|
||||||
|
builder_model_count += 1
|
||||||
|
builder_model_number = model_number
|
||||||
|
builder_chain_id = None
|
||||||
|
builder_residue_id = None
|
||||||
|
if chain_id != builder_chain_id:
|
||||||
|
self._structure_builder.init_chain(chain_id)
|
||||||
|
builder_chain_id = chain_id
|
||||||
|
builder_residue_id = None
|
||||||
|
if residue_id != builder_residue_id or component_id != builder_component_id:
|
||||||
|
self._structure_builder.init_residue(component_id, *residue_id)
|
||||||
|
builder_residue_id = residue_id
|
||||||
|
builder_component_id = component_id
|
||||||
|
|
||||||
|
self._structure_builder.init_atom(**atoms[index])
|
||||||
|
|
||||||
|
return self._structure_builder.get_structure()
|
@ -62,6 +62,28 @@ Example: get the list of the :math:`y` coordinates of all atoms
|
|||||||
|
|
||||||
>>> y_list = mmcif_dict["_atom_site.Cartn_y"]
|
>>> y_list = mmcif_dict["_atom_site.Cartn_y"]
|
||||||
|
|
||||||
|
Reading a BinaryCIF file
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
Create a ``BinaryCIFParser`` object:
|
||||||
|
|
||||||
|
.. doctest ../Tests/PDB lib:numpy lib:msgpack
|
||||||
|
|
||||||
|
.. code:: pycon
|
||||||
|
|
||||||
|
>>> from Bio.PDB.binary_cif import BinaryCIFParser
|
||||||
|
>>> parser = BinaryCIFParser()
|
||||||
|
|
||||||
|
Call ``get_structure`` with the path to the BinaryCIF file:
|
||||||
|
|
||||||
|
.. cont-doctest ../Tests/PDB lib:numpy lib:msgpack
|
||||||
|
|
||||||
|
.. code:: pycon
|
||||||
|
|
||||||
|
>>> parser.get_structure("1GBT", "1gbt.bcif.gz")
|
||||||
|
<Structure id=1GBT>
|
||||||
|
|
||||||
Reading files in the MMTF format
|
Reading files in the MMTF format
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
6
NEWS.rst
6
NEWS.rst
@ -68,6 +68,12 @@ A parser has been added for parsing PDBML (PDB XML) files.
|
|||||||
`PDBML <https://pdbml.wwpdb.org/>`_ is a representation of PDB data in XML format.
|
`PDBML <https://pdbml.wwpdb.org/>`_ is a representation of PDB data in XML format.
|
||||||
The PDB chapter of the tutorial is updated to show how to use the PDBML parser.
|
The PDB chapter of the tutorial is updated to show how to use the PDBML parser.
|
||||||
|
|
||||||
|
Additionally, a parser has been added for BinaryCIF files.
|
||||||
|
BinaryCIF is a compact, binary representation of CIF data.
|
||||||
|
The PDB tutorial is updated to show how to use the BinaryCIF parser.
|
||||||
|
The RCSB PDB recommends that users switch from MMTF to BinaryCIF.
|
||||||
|
See the `announcement <https://www.rcsb.org/news/feature/65a1af31c76ca3abcc925d0c>`_.
|
||||||
|
|
||||||
Bio.PDB Structure objects will now issue a warning - instead of an exception - when
|
Bio.PDB Structure objects will now issue a warning - instead of an exception - when
|
||||||
two children (e.g. residues) have identical IDs. This can be useful in some
|
two children (e.g. residues) have identical IDs. This can be useful in some
|
||||||
cases, e.g. renumbering residues in a chain.
|
cases, e.g. renumbering residues in a chain.
|
||||||
|
BIN
Tests/PDB/1gbt.bcif.gz
Normal file
BIN
Tests/PDB/1gbt.bcif.gz
Normal file
Binary file not shown.
BIN
Tests/PDB/3jqh.bcif.gz
Normal file
BIN
Tests/PDB/3jqh.bcif.gz
Normal file
Binary file not shown.
BIN
Tests/PDB/6wg6.bcif.gz
Normal file
BIN
Tests/PDB/6wg6.bcif.gz
Normal file
Binary file not shown.
@ -159,6 +159,14 @@ class SortingTests(unittest.TestCase):
|
|||||||
structure2.strictly_equals(structure)
|
structure2.strictly_equals(structure)
|
||||||
) # Strict equality should be symmetric
|
) # Strict equality should be symmetric
|
||||||
|
|
||||||
|
# Modify an atom
|
||||||
|
structure2[0]["A"][(" ", 200, " ")]["CA"].name = "AC"
|
||||||
|
|
||||||
|
self.assertFalse(structure.strictly_equals(structure2))
|
||||||
|
self.assertFalse(
|
||||||
|
structure2.strictly_equals(structure)
|
||||||
|
) # Strict equality should be symmetric
|
||||||
|
|
||||||
# Remove a chain from a model in the structure
|
# Remove a chain from a model in the structure
|
||||||
structure2[0].detach_child("A")
|
structure2[0].detach_child("A")
|
||||||
|
|
||||||
|
25
Tests/test_PDB_binary_cif.py
Normal file
25
Tests/test_PDB_binary_cif.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""
|
||||||
|
Tests for BinaryCIF code in the PDB package.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from Bio.PDB import MMCIFParser
|
||||||
|
from Bio.PDB.binary_cif import BinaryCIFParser
|
||||||
|
|
||||||
|
|
||||||
|
class TestBinaryCIFParser(unittest.TestCase):
|
||||||
|
def test_get_structure(self):
|
||||||
|
mmcif_parser = MMCIFParser(auth_chains=False)
|
||||||
|
bcif_parser = BinaryCIFParser()
|
||||||
|
|
||||||
|
for entry in ["1GBT", "6WG6", "3JQH"]:
|
||||||
|
mmcif_structure = mmcif_parser.get_structure(entry, f"PDB/{entry}.cif")
|
||||||
|
bcif_structure = bcif_parser.get_structure(
|
||||||
|
entry, f"PDB/{entry.lower()}.bcif.gz"
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
mmcif_structure.strictly_equals(
|
||||||
|
bcif_structure, compare_coordinates=True
|
||||||
|
)
|
||||||
|
)
|
3
setup.py
3
setup.py
@ -193,8 +193,9 @@ EXTENSIONS = [
|
|||||||
Extension(
|
Extension(
|
||||||
"Bio.Cluster._cluster", ["Bio/Cluster/cluster.c", "Bio/Cluster/clustermodule.c"]
|
"Bio.Cluster._cluster", ["Bio/Cluster/cluster.c", "Bio/Cluster/clustermodule.c"]
|
||||||
),
|
),
|
||||||
Extension("Bio.PDB.kdtrees", ["Bio/PDB/kdtrees.c"]),
|
|
||||||
Extension("Bio.PDB.ccealign", ["Bio/PDB/ccealignmodule.c"]),
|
Extension("Bio.PDB.ccealign", ["Bio/PDB/ccealignmodule.c"]),
|
||||||
|
Extension("Bio.PDB.kdtrees", ["Bio/PDB/kdtrees.c"]),
|
||||||
|
Extension("Bio.PDB._bcif_helper", ["Bio/PDB/bcifhelpermodule.c"]),
|
||||||
Extension("Bio.SeqIO._twoBitIO", ["Bio/SeqIO/_twoBitIO.c"]),
|
Extension("Bio.SeqIO._twoBitIO", ["Bio/SeqIO/_twoBitIO.c"]),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user