mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
194 lines
6.2 KiB
Python
194 lines
6.2 KiB
Python
# Copyright 2013 by Leighton Pritchard. All rights reserved.
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
"""Classes and functions to parse a KGML pathway map.
|
|
|
|
The KGML pathway map is parsed into the object structure defined in
|
|
KGML_Pathway.py in this module.
|
|
|
|
Classes:
|
|
- KGMLParser - Parses KGML file
|
|
|
|
Functions:
|
|
- read - Returns a single Pathway object, using KGMLParser internally
|
|
|
|
"""
|
|
|
|
from io import StringIO
|
|
from xml.etree import ElementTree
|
|
|
|
from Bio.KEGG.KGML.KGML_pathway import Component
|
|
from Bio.KEGG.KGML.KGML_pathway import Entry
|
|
from Bio.KEGG.KGML.KGML_pathway import Graphics
|
|
from Bio.KEGG.KGML.KGML_pathway import Pathway
|
|
from Bio.KEGG.KGML.KGML_pathway import Reaction
|
|
from Bio.KEGG.KGML.KGML_pathway import Relation
|
|
|
|
|
|
def read(handle):
|
|
"""Parse a single KEGG Pathway from given file handle.
|
|
|
|
Returns a single Pathway object. There should be one and only
|
|
one pathway in each file, but there may well be pathological
|
|
examples out there.
|
|
"""
|
|
pathways = parse(handle)
|
|
try:
|
|
pathway = next(pathways)
|
|
except StopIteration:
|
|
raise ValueError("No pathways found in handle") from None
|
|
try:
|
|
next(pathways)
|
|
raise ValueError("More than one pathway found in handle")
|
|
except StopIteration:
|
|
pass
|
|
return pathway
|
|
|
|
|
|
def parse(handle):
|
|
"""Return an iterator over Pathway elements.
|
|
|
|
Arguments:
|
|
- handle - file handle to a KGML file for parsing, or a KGML string
|
|
|
|
This is a generator for the return of multiple Pathway objects.
|
|
|
|
"""
|
|
# Check handle
|
|
try:
|
|
handle.read(0)
|
|
except AttributeError:
|
|
try:
|
|
handle = StringIO(handle)
|
|
except TypeError:
|
|
raise TypeError(
|
|
"An XML-containing handle or an XML string must be provided"
|
|
) from None
|
|
# Parse XML and return each Pathway
|
|
for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
|
|
if event == "end" and elem.tag == "pathway":
|
|
yield KGMLParser(elem).parse()
|
|
elem.clear()
|
|
|
|
|
|
class KGMLParser:
|
|
"""Parses a KGML XML Pathway entry into a Pathway object.
|
|
|
|
Example: Read and parse large metabolism file
|
|
|
|
>>> from Bio.KEGG.KGML.KGML_parser import read
|
|
>>> pathway = read(open('KEGG/ko01100.xml', 'r'))
|
|
>>> print(len(pathway.entries))
|
|
3628
|
|
>>> print(len(pathway.reactions))
|
|
1672
|
|
>>> print(len(pathway.maps))
|
|
149
|
|
|
|
>>> pathway = read(open('KEGG/ko00010.xml', 'r'))
|
|
>>> print(pathway) #doctest: +NORMALIZE_WHITESPACE
|
|
Pathway: Glycolysis / Gluconeogenesis
|
|
KEGG ID: path:ko00010
|
|
Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png
|
|
Organism: ko
|
|
Entries: 99
|
|
Entry types:
|
|
ortholog: 61
|
|
compound: 31
|
|
map: 7
|
|
|
|
"""
|
|
|
|
def __init__(self, elem):
|
|
"""Initialize the class."""
|
|
self.entry = elem
|
|
|
|
def parse(self):
|
|
"""Parse the input elements."""
|
|
|
|
def _parse_pathway(attrib):
|
|
for k, v in attrib.items():
|
|
self.pathway.__setattr__(k, v)
|
|
|
|
def _parse_entry(element):
|
|
new_entry = Entry()
|
|
for k, v in element.attrib.items():
|
|
new_entry.__setattr__(k, v)
|
|
for subelement in element:
|
|
if subelement.tag == "graphics":
|
|
_parse_graphics(subelement, new_entry)
|
|
elif subelement.tag == "component":
|
|
_parse_component(subelement, new_entry)
|
|
self.pathway.add_entry(new_entry)
|
|
|
|
def _parse_graphics(element, entry):
|
|
new_graphics = Graphics(entry)
|
|
for k, v in element.attrib.items():
|
|
new_graphics.__setattr__(k, v)
|
|
entry.add_graphics(new_graphics)
|
|
|
|
def _parse_component(element, entry):
|
|
new_component = Component(entry)
|
|
for k, v in element.attrib.items():
|
|
new_component.__setattr__(k, v)
|
|
entry.add_component(new_component)
|
|
|
|
def _parse_reaction(element):
|
|
new_reaction = Reaction()
|
|
for k, v in element.attrib.items():
|
|
new_reaction.__setattr__(k, v)
|
|
for subelement in element:
|
|
if subelement.tag == "substrate":
|
|
new_reaction.add_substrate(int(subelement.attrib["id"]))
|
|
elif subelement.tag == "product":
|
|
new_reaction.add_product(int(subelement.attrib["id"]))
|
|
self.pathway.add_reaction(new_reaction)
|
|
|
|
def _parse_relation(element):
|
|
new_relation = Relation()
|
|
new_relation.entry1 = int(element.attrib["entry1"])
|
|
new_relation.entry2 = int(element.attrib["entry2"])
|
|
new_relation.type = element.attrib["type"]
|
|
for subtype in element:
|
|
name, value = subtype.attrib["name"], subtype.attrib["value"]
|
|
if name in ("compound", "hidden compound"):
|
|
new_relation.subtypes.append((name, int(value)))
|
|
else:
|
|
new_relation.subtypes.append((name, value))
|
|
self.pathway.add_relation(new_relation)
|
|
|
|
# ==========
|
|
# Initialize Pathway
|
|
self.pathway = Pathway()
|
|
# Get information about the pathway itself
|
|
_parse_pathway(self.entry.attrib)
|
|
for element in self.entry:
|
|
if element.tag == "entry":
|
|
_parse_entry(element)
|
|
elif element.tag == "reaction":
|
|
_parse_reaction(element)
|
|
elif element.tag == "relation":
|
|
_parse_relation(element)
|
|
# Parsing of some elements not implemented - no examples yet
|
|
else:
|
|
# This should warn us of any unimplemented tags
|
|
import warnings
|
|
|
|
from Bio import BiopythonParserWarning
|
|
|
|
warnings.warn(
|
|
f"Warning: tag {element.tag} not implemented in parser",
|
|
BiopythonParserWarning,
|
|
)
|
|
return self.pathway
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from Bio._utils import run_doctest
|
|
|
|
run_doctest(verbose=0)
|