mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Uniprotxml parse ligand tag (#5001)
* enable handling of <ligand> tags in UniProt XML files * test parsing of <ligand> in UniProt XML * add name to CONTRIB.rst * format with black * enable capture of multiple ligands from uniprot xml file. update tests accordingly * add mock UniProt XML file with multiple ligand tags per feature * test for correct len of all ligands list and multiple ligands per feature
This commit is contained in:
@ -16,19 +16,15 @@ originally introduced by SwissProt ("swiss" format in Bio.SeqIO).
|
||||
|
||||
"""
|
||||
|
||||
import warnings
|
||||
from xml.etree import ElementTree
|
||||
from xml.parsers.expat import errors
|
||||
import warnings
|
||||
|
||||
from Bio import SeqFeature
|
||||
from Bio import BiopythonDeprecationWarning, SeqFeature
|
||||
from Bio.Seq import Seq
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
|
||||
from Bio import BiopythonDeprecationWarning
|
||||
|
||||
from .Interfaces import _BytesIOSource
|
||||
from .Interfaces import SequenceIterator
|
||||
|
||||
from .Interfaces import SequenceIterator, _BytesIOSource
|
||||
|
||||
NS = "{http://uniprot.org/uniprot}"
|
||||
REFERENCE_JOURNAL = "%(name)s %(volume)s:%(first)s-%(last)s(%(pub_date)s)"
|
||||
@ -472,6 +468,19 @@ class UniprotIterator(SequenceIterator):
|
||||
feature.location = SeqFeature.SimpleLocation(
|
||||
start_position, end_position
|
||||
)
|
||||
elif feature_element.tag == NS + "ligand":
|
||||
# Support multiple ligand entries per feature
|
||||
name = None
|
||||
db_ref = None
|
||||
for child in feature_element:
|
||||
if child.tag == NS + "name":
|
||||
name = child.text.strip() if child.text else None
|
||||
elif child.tag == NS + "dbReference":
|
||||
db_ref = child.attrib.get("id")
|
||||
# Append to a list of ligands in qualifiers
|
||||
lig_list = feature.qualifiers.setdefault("ligands", [])
|
||||
lig_list.append({"name": name, "db_ref": db_ref})
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
feature.qualifiers[feature_element.tag.replace(NS, "")] = (
|
||||
|
@ -155,6 +155,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
||||
- Iddo Friedberg <https://github.com/idoerg>
|
||||
- Igor S. Gerasimov <https://github.com/foxtran>
|
||||
- Ilya Flyamer <https://github.com/Phlya>
|
||||
- Ira Horecka <https://github.com/irahorecka>
|
||||
- Isaac Ellmen <https://github.com/Ellmen>
|
||||
- Ivan Antonov <https://github.com/vanya-antonov>
|
||||
- Jacek Śmietański <https://github.com/dadoskawina>
|
||||
|
2343
Tests/SwissProt/P62330.xml
Normal file
2343
Tests/SwissProt/P62330.xml
Normal file
File diff suppressed because it is too large
Load Diff
18
Tests/SwissProt/multiligand.xml
Normal file
18
Tests/SwissProt/multiligand.xml
Normal file
@ -0,0 +1,18 @@
|
||||
<uniprot xmlns="http://uniprot.org/uniprot">
|
||||
<entry dataset="Swiss-Prot" created="2024-01-01" modified="2025-01-01" version="1">
|
||||
<name>TEST_PROTEIN</name>
|
||||
<accession>DUMMY123</accession>
|
||||
<sequence length="1" mass="100" version="1">A</sequence>
|
||||
<feature type="binding site">
|
||||
<location><position position="1"/></location>
|
||||
<ligand>
|
||||
<name>ATP</name>
|
||||
<dbReference type="ChEBI" id="CHEBI:15422"/>
|
||||
</ligand>
|
||||
<ligand>
|
||||
<name>ADP</name>
|
||||
<dbReference type="ChEBI" id="CHEBI:16761"/>
|
||||
</ligand>
|
||||
</feature>
|
||||
</entry>
|
||||
</uniprot>
|
@ -498,6 +498,30 @@ class ParserTests(SeqRecordTestBaseClass):
|
||||
# test Entry version
|
||||
self.assertEqual(seq_record.annotations["entry_version"], 158)
|
||||
|
||||
def test_P62330_ligand(self):
|
||||
"""Test parsing of <ligand> in UniProt XML (P62330)."""
|
||||
record = SeqIO.read("SwissProt/P62330.xml", "uniprot-xml")
|
||||
all_ligands = []
|
||||
for f in record.features:
|
||||
all_ligands.extend(f.qualifiers.get("ligands", []))
|
||||
self.assertEqual(len(all_ligands), 5)
|
||||
self.assertEqual(all_ligands[0]["name"], "GTP")
|
||||
self.assertEqual(all_ligands[0]["db_ref"], "CHEBI:37565")
|
||||
|
||||
def test_multiligand_binding_site(self):
|
||||
"""Test parsing of binding site with multiple ligands in UniProt XML."""
|
||||
record = SeqIO.read("SwissProt/multiligand.xml", "uniprot-xml")
|
||||
sites = [
|
||||
f
|
||||
for f in record.features
|
||||
if f.type == "binding site" and "ligands" in f.qualifiers
|
||||
]
|
||||
self.assertTrue(sites, "No binding site with ligands found")
|
||||
self.assertEqual(len(sites[0].qualifiers["ligands"]), 2)
|
||||
ligand_names = {lig["name"] for lig in sites[0].qualifiers["ligands"]}
|
||||
self.assertIn("ATP", ligand_names)
|
||||
self.assertIn("ADP", ligand_names)
|
||||
|
||||
def compare_txt_xml(self, old, new):
|
||||
"""Compare text and XML based parser output."""
|
||||
self.assertEqual(old.id, new.id)
|
||||
|
Reference in New Issue
Block a user