mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
198 lines
8.3 KiB
Python
198 lines
8.3 KiB
Python
# Copyright 2017 by Bernhard Thiel. All rights reserved.
|
|
# Revisions copyright 2024 James Krieger.
|
|
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
"""Unit tests for PARTS of the parse_pdb_header module of Bio.PDB."""
|
|
|
|
import unittest
|
|
|
|
try:
|
|
import numpy as np # noqa F401
|
|
except ImportError:
|
|
from Bio import MissingPythonDependencyError
|
|
|
|
raise MissingPythonDependencyError(
|
|
"Install NumPy if you want to use Bio.PDB."
|
|
) from None
|
|
|
|
from Bio.PDB import PDBParser
|
|
from Bio.PDB.parse_pdb_header import _parse_remark_465
|
|
from Bio.PDB.parse_pdb_header import parse_pdb_header
|
|
|
|
|
|
class ParseReal(unittest.TestCase):
|
|
"""Testing with real PDB file(s)."""
|
|
|
|
def test_1(self):
|
|
"""Parse the header of a known PDB file (1A8O)."""
|
|
parser = PDBParser()
|
|
struct = parser.get_structure("1A8O", "PDB/1A8O.pdb")
|
|
self.assertAlmostEqual(struct.header["resolution"], 1.7)
|
|
# Case-insensitive string comparisons
|
|
known_strings = {
|
|
"author": "T.R.Gamble,S.Yoo,F.F.Vajdos,U.K.Von Schwedler,D.K.Worthylake,H.Wang,J.P.Mccutcheon,W.I.Sundquist,C.P.Hill",
|
|
"deposition_date": "1998-03-27",
|
|
"head": "viral protein",
|
|
"journal": "AUTH T.R.GAMBLE,S.YOO,F.F.VAJDOS,U.K.VON SCHWEDLER,AUTH 2 D.K.WORTHYLAKE,H.WANG,J.P.MCCUTCHEON,W.I.SUNDQUIST,AUTH 3 C.P.HILLTITL STRUCTURE OF THE CARBOXYL-TERMINAL DIMERIZATIONTITL 2 DOMAIN OF THE HIV-1 CAPSID PROTEIN.REF SCIENCE V. 278 849 1997REFN ISSN 0036-8075PMID 9346481DOI 10.1126/SCIENCE.278.5339.849",
|
|
"journal_reference": "t.r.gamble,s.yoo,f.f.vajdos,u.k.von schwedler, d.k.worthylake,h.wang,j.p.mccutcheon,w.i.sundquist, c.p.hill structure of the carboxyl-terminal dimerization domain of the hiv-1 capsid protein. science v. 278 849 1997 issn 0036-8075 9346481 10.1126/science.278.5339.849 ",
|
|
"keywords": "capsid, core protein, hiv, c-terminal domain, viral protein",
|
|
"name": "hiv capsid c-terminal domain",
|
|
"release_date": "1998-10-14",
|
|
"structure_method": "x-ray diffraction",
|
|
}
|
|
for key, expect in known_strings.items():
|
|
self.assertEqual(struct.header[key].lower(), expect.lower())
|
|
|
|
def test_2(self):
|
|
"""Parse the header of another PDB file (2BEG)."""
|
|
parser = PDBParser()
|
|
struct = parser.get_structure("2BEG", "PDB/2BEG.pdb")
|
|
known_strings = {
|
|
"author": "T.Luhrs,C.Ritter,M.Adrian,D.Riek-Loher,B.Bohrmann,H.Dobeli,D.Schubert,R.Riek",
|
|
"deposition_date": "2005-10-24",
|
|
"head": "protein fibril",
|
|
"journal": "AUTH T.LUHRS,C.RITTER,M.ADRIAN,D.RIEK-LOHER,B.BOHRMANN,AUTH 2 H.DOBELI,D.SCHUBERT,R.RIEKTITL 3D STRUCTURE OF ALZHEIMER'S AMYLOID-{BETA}(1-42)TITL 2 FIBRILS.REF PROC.NATL.ACAD.SCI.USA V. 102 17342 2005REFN ISSN 0027-8424PMID 16293696DOI 10.1073/PNAS.0506723102",
|
|
"journal_reference": "t.luhrs,c.ritter,m.adrian,d.riek-loher,b.bohrmann, h.dobeli,d.schubert,r.riek 3d structure of alzheimer's amyloid-{beta}(1-42) fibrils. proc.natl.acad.sci.usa v. 102 17342 2005 issn 0027-8424 16293696 10.1073/pnas.0506723102 ",
|
|
"keywords": "alzheimer's, fibril, protofilament, beta-sandwich, quenched hydrogen/deuterium exchange, pairwise mutagenesis, protein fibril",
|
|
"name": "3d structure of alzheimer's abeta(1-42) fibrils",
|
|
"release_date": "2005-11-22",
|
|
"structure_method": "solution nmr",
|
|
}
|
|
for key, expect in known_strings.items():
|
|
self.assertEqual(struct.header[key].lower(), expect.lower())
|
|
|
|
def test_parse_pdb_with_remark_465(self):
|
|
"""Tests that parse_pdb_header now can identify some REMARK 465 entries."""
|
|
header = parse_pdb_header("PDB/2XHE.pdb")
|
|
self.assertEqual(header["idcode"], "2XHE")
|
|
self.assertTrue(header["has_missing_residues"])
|
|
self.assertEqual(len(header["missing_residues"]), 142)
|
|
self.assertIn(
|
|
{
|
|
"model": None,
|
|
"res_name": "GLN",
|
|
"chain": "B",
|
|
"ssseq": 267,
|
|
"insertion": None,
|
|
},
|
|
header["missing_residues"],
|
|
)
|
|
header = parse_pdb_header("PDB/1A8O.pdb")
|
|
self.assertFalse(header["has_missing_residues"])
|
|
self.assertEqual(header["missing_residues"], [])
|
|
|
|
def test_parse_remark_465(self):
|
|
"""A UNIT-test for the private function _parse_remark_465."""
|
|
info = _parse_remark_465("GLU B 276")
|
|
self.assertEqual(
|
|
info,
|
|
{
|
|
"model": None,
|
|
"res_name": "GLU",
|
|
"chain": "B",
|
|
"ssseq": 276,
|
|
"insertion": None,
|
|
},
|
|
)
|
|
|
|
info = _parse_remark_465("U R -9") # Based on PDB id: 7c7a
|
|
self.assertEqual(
|
|
info,
|
|
{
|
|
"model": None,
|
|
"res_name": "U",
|
|
"chain": "R",
|
|
"ssseq": -9,
|
|
"insertion": None,
|
|
},
|
|
)
|
|
|
|
info = _parse_remark_465("2 GLU B 276B")
|
|
self.assertEqual(
|
|
info,
|
|
{
|
|
"model": 2,
|
|
"res_name": "GLU",
|
|
"chain": "B",
|
|
"ssseq": 276,
|
|
"insertion": "B",
|
|
},
|
|
)
|
|
|
|
info = _parse_remark_465("A 2 11")
|
|
self.assertEqual(
|
|
info,
|
|
{
|
|
"model": None,
|
|
"res_name": "A",
|
|
"chain": "2",
|
|
"ssseq": 11,
|
|
"insertion": None,
|
|
},
|
|
)
|
|
|
|
info = _parse_remark_465("1 DG B 9")
|
|
self.assertEqual(
|
|
info,
|
|
{"model": 1, "res_name": "DG", "chain": "B", "ssseq": 9, "insertion": None},
|
|
)
|
|
|
|
def test_parse_header_line(self):
|
|
"""Unit test for parsing and converting fields in HEADER record."""
|
|
header = parse_pdb_header("PDB/header.pdb")
|
|
self.assertEqual(header["head"], "structural genomics, unknown function")
|
|
self.assertEqual(header["idcode"], "3EFG")
|
|
self.assertEqual(header["deposition_date"], "2008-09-08")
|
|
|
|
def test_parse_title_line(self):
|
|
"""Unit test for correct parsing of multiline title records."""
|
|
header = parse_pdb_header("PDB/1LCD.pdb")
|
|
self.assertEqual(
|
|
header["name"],
|
|
"structure of the complex of lac repressor headpiece and an 11 "
|
|
"base-pair half-operator determined by nuclear magnetic resonance "
|
|
"spectroscopy and restrained molecular dynamics",
|
|
)
|
|
|
|
def test_parse_no_title(self):
|
|
"""Unit test for sensible result with no TITLE line."""
|
|
header = parse_pdb_header("PDB/occupancy.pdb")
|
|
self.assertEqual(header["name"], "")
|
|
|
|
def test_parse_pdb_with_remark_99(self):
|
|
"""Tests that parse_pdb_header can identify REMARK 99 ASTRAL entries."""
|
|
header = parse_pdb_header("PDB/d256ba_.ent")
|
|
self.assertIn("astral", header)
|
|
self.assertEqual(header["astral"]["SCOP-sccs"], "a.24.3.1")
|
|
self.assertEqual(header["astral"]["Source-PDB"], "256b")
|
|
self.assertEqual(header["astral"]["Region"], "a:")
|
|
self.assertEqual(header["astral"]["ASTRAL-SPACI"], "0.72")
|
|
|
|
def test_parse_pdb_with_remark_350_biomoltrans(self):
|
|
"""Tests that parse_pdb_header now can identify some REMARK 350 entries."""
|
|
header = parse_pdb_header("PDB/2XHE.pdb")
|
|
self.assertEqual(
|
|
header["biomoltrans"],
|
|
{
|
|
"1": [
|
|
["A", "B"],
|
|
" 1.000000 0.000000 0.000000 0.00000 \n",
|
|
" 0.000000 1.000000 0.000000 0.00000 \n",
|
|
" 0.000000 0.000000 1.000000 0.00000 \n",
|
|
]
|
|
},
|
|
)
|
|
|
|
def test_parse_pdb_without_remark_350_biomoltrans(self):
|
|
header = parse_pdb_header("PDB/1LCD.pdb")
|
|
self.assertEqual(header["biomoltrans"], {})
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|