mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
340 lines
11 KiB
Python
340 lines
11 KiB
Python
# Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
|
|
# Copyright 2007 by Michiel de Hoon. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
|
|
"""Code to work with the KEGG Enzyme database.
|
|
|
|
Functions:
|
|
- parse - Returns an iterator giving Record objects.
|
|
|
|
Classes:
|
|
- Record - Holds the information from a KEGG Enzyme record.
|
|
"""
|
|
|
|
from Bio.KEGG import _default_wrap
|
|
from Bio.KEGG import _struct_wrap
|
|
from Bio.KEGG import _wrap_kegg
|
|
from Bio.KEGG import _write_kegg
|
|
|
|
# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
|
|
rxn_wrap = [
|
|
0,
|
|
"",
|
|
(" + ", "", 1, 1),
|
|
(" = ", "", 1, 1),
|
|
(" ", "$", 1, 1),
|
|
("-", "$", 1, 1),
|
|
]
|
|
name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
|
|
id_wrap = _default_wrap
|
|
struct_wrap = _struct_wrap
|
|
|
|
|
|
class Record:
|
|
"""Holds info from a KEGG Enzyme record.
|
|
|
|
Attributes:
|
|
- entry The EC number (without the 'EC ').
|
|
- name A list of the enzyme names.
|
|
- classname A list of the classification terms.
|
|
- sysname The systematic name of the enzyme.
|
|
- reaction A list of the reaction description strings.
|
|
- substrate A list of the substrates.
|
|
- product A list of the products.
|
|
- inhibitor A list of the inhibitors.
|
|
- cofactor A list of the cofactors.
|
|
- effector A list of the effectors.
|
|
- comment A list of the comment strings.
|
|
- pathway A list of 3-tuples: (database, id, pathway)
|
|
- genes A list of 2-tuples: (organism, list of gene ids)
|
|
- disease A list of 3-tuples: (database, id, disease)
|
|
- structures A list of 2-tuples: (database, list of struct ids)
|
|
- dblinks A list of 2-tuples: (database, list of db ids)
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize a new Record."""
|
|
self.entry = ""
|
|
self.name = []
|
|
self.classname = []
|
|
self.sysname = []
|
|
self.reaction = []
|
|
self.substrate = []
|
|
self.product = []
|
|
self.inhibitor = []
|
|
self.cofactor = []
|
|
self.effector = []
|
|
self.comment = []
|
|
self.pathway = []
|
|
self.genes = []
|
|
self.disease = []
|
|
self.structures = []
|
|
self.dblinks = []
|
|
|
|
def __str__(self):
|
|
"""Return a string representation of this Record."""
|
|
return (
|
|
self._entry()
|
|
+ self._name()
|
|
+ self._classname()
|
|
+ self._sysname()
|
|
+ self._reaction()
|
|
+ self._substrate()
|
|
+ self._product()
|
|
+ self._inhibitor()
|
|
+ self._cofactor()
|
|
+ self._effector()
|
|
+ self._comment()
|
|
+ self._pathway()
|
|
+ self._genes()
|
|
+ self._disease()
|
|
+ self._structures()
|
|
+ self._dblinks()
|
|
+ "///"
|
|
)
|
|
|
|
def _entry(self):
|
|
return _write_kegg("ENTRY", ["EC " + self.entry])
|
|
|
|
def _name(self):
|
|
return _write_kegg(
|
|
"NAME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.name]
|
|
)
|
|
|
|
def _classname(self):
|
|
return _write_kegg("CLASS", self.classname)
|
|
|
|
def _sysname(self):
|
|
return _write_kegg(
|
|
"SYSNAME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.sysname]
|
|
)
|
|
|
|
def _reaction(self):
|
|
return _write_kegg(
|
|
"REACTION", [_wrap_kegg(line, wrap_rule=rxn_wrap) for line in self.reaction]
|
|
)
|
|
|
|
def _substrate(self):
|
|
return _write_kegg(
|
|
"SUBSTRATE",
|
|
[_wrap_kegg(line, wrap_rule=name_wrap) for line in self.substrate],
|
|
)
|
|
|
|
def _product(self):
|
|
return _write_kegg(
|
|
"PRODUCT", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.product]
|
|
)
|
|
|
|
def _inhibitor(self):
|
|
return _write_kegg(
|
|
"INHIBITOR",
|
|
[_wrap_kegg(line, wrap_rule=name_wrap) for line in self.inhibitor],
|
|
)
|
|
|
|
def _cofactor(self):
|
|
return _write_kegg(
|
|
"COFACTOR",
|
|
[_wrap_kegg(line, wrap_rule=name_wrap) for line in self.cofactor],
|
|
)
|
|
|
|
def _effector(self):
|
|
return _write_kegg(
|
|
"EFFECTOR",
|
|
[_wrap_kegg(line, wrap_rule=name_wrap) for line in self.effector],
|
|
)
|
|
|
|
def _comment(self):
|
|
return _write_kegg(
|
|
"COMMENT", [_wrap_kegg(line, wrap_rule=id_wrap(0)) for line in self.comment]
|
|
)
|
|
|
|
def _pathway(self):
|
|
s = []
|
|
for entry in self.pathway:
|
|
s.append(entry[0] + ": " + entry[1] + " " + entry[2])
|
|
return _write_kegg(
|
|
"PATHWAY", [_wrap_kegg(line, wrap_rule=id_wrap(16)) for line in s]
|
|
)
|
|
|
|
def _genes(self):
|
|
s = []
|
|
for entry in self.genes:
|
|
s.append(entry[0] + ": " + " ".join(entry[1]))
|
|
return _write_kegg(
|
|
"GENES", [_wrap_kegg(line, wrap_rule=id_wrap(5)) for line in s]
|
|
)
|
|
|
|
def _disease(self):
|
|
s = []
|
|
for entry in self.disease:
|
|
s.append(entry[0] + ": " + entry[1] + " " + entry[2])
|
|
return _write_kegg(
|
|
"DISEASE", [_wrap_kegg(line, wrap_rule=id_wrap(13)) for line in s]
|
|
)
|
|
|
|
def _structures(self):
|
|
s = []
|
|
for entry in self.structures:
|
|
s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
|
|
return _write_kegg(
|
|
"STRUCTURES", [_wrap_kegg(line, wrap_rule=struct_wrap(5)) for line in s]
|
|
)
|
|
|
|
def _dblinks(self):
|
|
# This is a bit of a cheat that won't work if enzyme entries
|
|
# have more than one link id per db id. For now, that's not
|
|
# the case - storing links ids in a list is only to make
|
|
# this class similar to the Compound.Record class.
|
|
s = []
|
|
for entry in self.dblinks:
|
|
s.append(entry[0] + ": " + " ".join(entry[1]))
|
|
return _write_kegg("DBLINKS", s)
|
|
|
|
|
|
def parse(handle):
|
|
"""Parse a KEGG Enzyme file, returning Record objects.
|
|
|
|
This is an iterator function, typically used in a for loop. For
|
|
example, using one of the example KEGG files in the Biopython
|
|
test suite,
|
|
|
|
>>> with open("KEGG/enzyme.sample") as handle:
|
|
... for record in parse(handle):
|
|
... print("%s %s" % (record.entry, record.name[0]))
|
|
...
|
|
1.1.1.1 alcohol dehydrogenase
|
|
1.1.1.62 17beta-estradiol 17-dehydrogenase
|
|
1.1.1.68 Transferred to 1.5.1.20
|
|
1.6.5.3 NADH:ubiquinone reductase (H+-translocating)
|
|
1.14.13.28 3,9-dihydroxypterocarpan 6a-monooxygenase
|
|
2.4.1.68 glycoprotein 6-alpha-L-fucosyltransferase
|
|
3.1.1.6 acetylesterase
|
|
2.7.2.1 acetate kinase
|
|
|
|
"""
|
|
record = Record()
|
|
for line in handle:
|
|
if line[:3] == "///":
|
|
yield record
|
|
record = Record()
|
|
continue
|
|
if line[:12] != " ":
|
|
keyword = line[:12]
|
|
data = line[12:].strip()
|
|
if keyword == "ENTRY ":
|
|
words = data.split()
|
|
record.entry = words[1]
|
|
elif keyword == "CLASS ":
|
|
record.classname.append(data)
|
|
elif keyword == "COFACTOR ":
|
|
record.cofactor.append(data)
|
|
elif keyword == "COMMENT ":
|
|
record.comment.append(data)
|
|
elif keyword == "DBLINKS ":
|
|
if ":" in data:
|
|
key, values = data.split(":")
|
|
values = values.split()
|
|
row = (key, values)
|
|
record.dblinks.append(row)
|
|
else:
|
|
row = record.dblinks[-1]
|
|
key, values = row
|
|
values.extend(data.split())
|
|
row = key, values
|
|
record.dblinks[-1] = row
|
|
elif keyword == "DISEASE ":
|
|
if ":" in data:
|
|
database, data = data.split(":")
|
|
number, name = data.split(None, 1)
|
|
row = (database, number, name)
|
|
record.disease.append(row)
|
|
else:
|
|
row = record.disease[-1]
|
|
database, number, name = row
|
|
name = name + " " + data
|
|
row = database, number, name
|
|
record.disease[-1] = row
|
|
elif keyword == "EFFECTOR ":
|
|
record.effector.append(data.strip(";"))
|
|
elif keyword == "GENES ":
|
|
if data[3:5] == ": " or data[4:6] == ": ":
|
|
key, values = data.split(":", 1)
|
|
values = [value.split("(")[0] for value in values.split()]
|
|
row = (key, values)
|
|
record.genes.append(row)
|
|
else:
|
|
row = record.genes[-1]
|
|
key, values = row
|
|
for value in data.split():
|
|
value = value.split("(")[0]
|
|
values.append(value)
|
|
row = key, values
|
|
record.genes[-1] = row
|
|
elif keyword == "INHIBITOR ":
|
|
record.inhibitor.append(data.strip(";"))
|
|
elif keyword == "NAME ":
|
|
record.name.append(data.strip(";"))
|
|
elif keyword == "PATHWAY ":
|
|
if data[:5] == "PATH:":
|
|
_, map_num, name = data.split(None, 2)
|
|
pathway = ("PATH", map_num, name)
|
|
record.pathway.append(pathway)
|
|
else:
|
|
ec_num, name = data.split(None, 1)
|
|
pathway = "PATH", ec_num, name
|
|
record.pathway.append(pathway)
|
|
elif keyword == "PRODUCT ":
|
|
record.product.append(data.strip(";"))
|
|
elif keyword == "REACTION ":
|
|
record.reaction.append(data.strip(";"))
|
|
elif keyword == "STRUCTURES ":
|
|
if data[:4] == "PDB:":
|
|
database = data[:3]
|
|
accessions = data[4:].split()
|
|
row = (database, accessions)
|
|
record.structures.append(row)
|
|
else:
|
|
row = record.structures[-1]
|
|
database, accessions = row
|
|
accessions.extend(data.split())
|
|
row = (database, accessions)
|
|
record.structures[-1] = row
|
|
elif keyword == "SUBSTRATE ":
|
|
record.substrate.append(data.strip(";"))
|
|
elif keyword == "SYSNAME ":
|
|
record.sysname.append(data.strip(";"))
|
|
|
|
|
|
def read(handle):
|
|
"""Parse a KEGG Enzyme file with exactly one entry.
|
|
|
|
If the handle contains no records, or more than one record,
|
|
an exception is raised. For example:
|
|
|
|
>>> with open("KEGG/enzyme.new") as handle:
|
|
... record = read(handle)
|
|
... print("%s %s" % (record.entry, record.name[0]))
|
|
...
|
|
6.2.1.25 benzoate---CoA ligase
|
|
"""
|
|
records = parse(handle)
|
|
try:
|
|
record = next(records)
|
|
except StopIteration:
|
|
raise ValueError("No records found in handle") from None
|
|
try:
|
|
next(records)
|
|
raise ValueError("More than one record found in handle")
|
|
except StopIteration:
|
|
pass
|
|
return record
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from Bio._utils import run_doctest
|
|
|
|
run_doctest()
|