mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Uniprotxml parse ligand tag (#5001)
* enable handling of <ligand> tags in UniProt XML files * test parsing of <ligand> in UniProt XML * add name to CONTRIB.rst * format with black * enable capture of multiple ligands from uniprot xml file. update tests accordingly * add mock UniProt XML file with multiple ligand tags per feature * test for correct len of all ligands list and multiple ligands per feature
This commit is contained in:
@ -16,19 +16,15 @@ originally introduced by SwissProt ("swiss" format in Bio.SeqIO).
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import warnings
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
from xml.parsers.expat import errors
|
from xml.parsers.expat import errors
|
||||||
import warnings
|
|
||||||
|
|
||||||
from Bio import SeqFeature
|
from Bio import BiopythonDeprecationWarning, SeqFeature
|
||||||
from Bio.Seq import Seq
|
from Bio.Seq import Seq
|
||||||
from Bio.SeqRecord import SeqRecord
|
from Bio.SeqRecord import SeqRecord
|
||||||
|
|
||||||
from Bio import BiopythonDeprecationWarning
|
from .Interfaces import SequenceIterator, _BytesIOSource
|
||||||
|
|
||||||
from .Interfaces import _BytesIOSource
|
|
||||||
from .Interfaces import SequenceIterator
|
|
||||||
|
|
||||||
|
|
||||||
NS = "{http://uniprot.org/uniprot}"
|
NS = "{http://uniprot.org/uniprot}"
|
||||||
REFERENCE_JOURNAL = "%(name)s %(volume)s:%(first)s-%(last)s(%(pub_date)s)"
|
REFERENCE_JOURNAL = "%(name)s %(volume)s:%(first)s-%(last)s(%(pub_date)s)"
|
||||||
@ -472,6 +468,19 @@ class UniprotIterator(SequenceIterator):
|
|||||||
feature.location = SeqFeature.SimpleLocation(
|
feature.location = SeqFeature.SimpleLocation(
|
||||||
start_position, end_position
|
start_position, end_position
|
||||||
)
|
)
|
||||||
|
elif feature_element.tag == NS + "ligand":
|
||||||
|
# Support multiple ligand entries per feature
|
||||||
|
name = None
|
||||||
|
db_ref = None
|
||||||
|
for child in feature_element:
|
||||||
|
if child.tag == NS + "name":
|
||||||
|
name = child.text.strip() if child.text else None
|
||||||
|
elif child.tag == NS + "dbReference":
|
||||||
|
db_ref = child.attrib.get("id")
|
||||||
|
# Append to a list of ligands in qualifiers
|
||||||
|
lig_list = feature.qualifiers.setdefault("ligands", [])
|
||||||
|
lig_list.append({"name": name, "db_ref": db_ref})
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
feature.qualifiers[feature_element.tag.replace(NS, "")] = (
|
feature.qualifiers[feature_element.tag.replace(NS, "")] = (
|
||||||
|
@ -155,6 +155,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
|||||||
- Iddo Friedberg <https://github.com/idoerg>
|
- Iddo Friedberg <https://github.com/idoerg>
|
||||||
- Igor S. Gerasimov <https://github.com/foxtran>
|
- Igor S. Gerasimov <https://github.com/foxtran>
|
||||||
- Ilya Flyamer <https://github.com/Phlya>
|
- Ilya Flyamer <https://github.com/Phlya>
|
||||||
|
- Ira Horecka <https://github.com/irahorecka>
|
||||||
- Isaac Ellmen <https://github.com/Ellmen>
|
- Isaac Ellmen <https://github.com/Ellmen>
|
||||||
- Ivan Antonov <https://github.com/vanya-antonov>
|
- Ivan Antonov <https://github.com/vanya-antonov>
|
||||||
- Jacek Śmietański <https://github.com/dadoskawina>
|
- Jacek Śmietański <https://github.com/dadoskawina>
|
||||||
|
2343
Tests/SwissProt/P62330.xml
Normal file
2343
Tests/SwissProt/P62330.xml
Normal file
File diff suppressed because it is too large
Load Diff
18
Tests/SwissProt/multiligand.xml
Normal file
18
Tests/SwissProt/multiligand.xml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<uniprot xmlns="http://uniprot.org/uniprot">
|
||||||
|
<entry dataset="Swiss-Prot" created="2024-01-01" modified="2025-01-01" version="1">
|
||||||
|
<name>TEST_PROTEIN</name>
|
||||||
|
<accession>DUMMY123</accession>
|
||||||
|
<sequence length="1" mass="100" version="1">A</sequence>
|
||||||
|
<feature type="binding site">
|
||||||
|
<location><position position="1"/></location>
|
||||||
|
<ligand>
|
||||||
|
<name>ATP</name>
|
||||||
|
<dbReference type="ChEBI" id="CHEBI:15422"/>
|
||||||
|
</ligand>
|
||||||
|
<ligand>
|
||||||
|
<name>ADP</name>
|
||||||
|
<dbReference type="ChEBI" id="CHEBI:16761"/>
|
||||||
|
</ligand>
|
||||||
|
</feature>
|
||||||
|
</entry>
|
||||||
|
</uniprot>
|
@ -498,6 +498,30 @@ class ParserTests(SeqRecordTestBaseClass):
|
|||||||
# test Entry version
|
# test Entry version
|
||||||
self.assertEqual(seq_record.annotations["entry_version"], 158)
|
self.assertEqual(seq_record.annotations["entry_version"], 158)
|
||||||
|
|
||||||
|
def test_P62330_ligand(self):
|
||||||
|
"""Test parsing of <ligand> in UniProt XML (P62330)."""
|
||||||
|
record = SeqIO.read("SwissProt/P62330.xml", "uniprot-xml")
|
||||||
|
all_ligands = []
|
||||||
|
for f in record.features:
|
||||||
|
all_ligands.extend(f.qualifiers.get("ligands", []))
|
||||||
|
self.assertEqual(len(all_ligands), 5)
|
||||||
|
self.assertEqual(all_ligands[0]["name"], "GTP")
|
||||||
|
self.assertEqual(all_ligands[0]["db_ref"], "CHEBI:37565")
|
||||||
|
|
||||||
|
def test_multiligand_binding_site(self):
|
||||||
|
"""Test parsing of binding site with multiple ligands in UniProt XML."""
|
||||||
|
record = SeqIO.read("SwissProt/multiligand.xml", "uniprot-xml")
|
||||||
|
sites = [
|
||||||
|
f
|
||||||
|
for f in record.features
|
||||||
|
if f.type == "binding site" and "ligands" in f.qualifiers
|
||||||
|
]
|
||||||
|
self.assertTrue(sites, "No binding site with ligands found")
|
||||||
|
self.assertEqual(len(sites[0].qualifiers["ligands"]), 2)
|
||||||
|
ligand_names = {lig["name"] for lig in sites[0].qualifiers["ligands"]}
|
||||||
|
self.assertIn("ATP", ligand_names)
|
||||||
|
self.assertIn("ADP", ligand_names)
|
||||||
|
|
||||||
def compare_txt_xml(self, old, new):
|
def compare_txt_xml(self, old, new):
|
||||||
"""Compare text and XML based parser output."""
|
"""Compare text and XML based parser output."""
|
||||||
self.assertEqual(old.id, new.id)
|
self.assertEqual(old.id, new.id)
|
||||||
|
Reference in New Issue
Block a user