mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
181 lines
5.3 KiB
Python
181 lines
5.3 KiB
Python
# Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
|
|
# Copyright 2007 by Michiel de Hoon. All rights reserved.
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
"""Code to work with the KEGG Ligand/Compound database.
|
|
|
|
Functions:
|
|
- parse - Returns an iterator giving Record objects.
|
|
|
|
Classes:
|
|
- Record - A representation of a KEGG Ligand/Compound.
|
|
"""
|
|
|
|
from Bio.KEGG import _default_wrap
|
|
from Bio.KEGG import _struct_wrap
|
|
from Bio.KEGG import _wrap_kegg
|
|
from Bio.KEGG import _write_kegg
|
|
|
|
# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
|
|
name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
|
|
id_wrap = _default_wrap
|
|
struct_wrap = _struct_wrap
|
|
|
|
|
|
class Record:
|
|
"""Holds info from a KEGG Ligand/Compound record.
|
|
|
|
Attributes:
|
|
- entry The entry identifier.
|
|
- name A list of the compound names.
|
|
- formula The chemical formula for the compound
|
|
- mass The molecular weight for the compound
|
|
- pathway A list of 3-tuples: ('PATH', pathway id, pathway)
|
|
- enzyme A list of the EC numbers.
|
|
- structures A list of 2-tuples: (database, list of struct ids)
|
|
- dblinks A list of 2-tuples: (database, list of link ids)
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize as new record."""
|
|
self.entry = ""
|
|
self.name = []
|
|
self.formula = ""
|
|
self.mass = ""
|
|
self.pathway = []
|
|
self.enzyme = []
|
|
self.structures = []
|
|
self.dblinks = []
|
|
|
|
def __str__(self):
|
|
"""Return a string representation of this Record."""
|
|
return (
|
|
self._entry()
|
|
+ self._name()
|
|
+ self._formula()
|
|
+ self._mass()
|
|
+ self._pathway()
|
|
+ self._enzyme()
|
|
+ self._structures()
|
|
+ self._dblinks()
|
|
+ "///"
|
|
)
|
|
|
|
def _entry(self):
|
|
return _write_kegg("ENTRY", [self.entry])
|
|
|
|
def _name(self):
|
|
return _write_kegg(
|
|
"NAME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.name]
|
|
)
|
|
|
|
def _formula(self):
|
|
return _write_kegg("FORMULA", [self.formula])
|
|
|
|
def _mass(self):
|
|
return _write_kegg("MASS", [self.mass])
|
|
|
|
def _pathway(self):
|
|
s = []
|
|
for entry in self.pathway:
|
|
s.append(entry[0] + " " + entry[1])
|
|
return _write_kegg(
|
|
"PATHWAY", [_wrap_kegg(line, wrap_rule=id_wrap(16)) for line in s]
|
|
)
|
|
|
|
def _enzyme(self):
|
|
return _write_kegg(
|
|
"ENZYME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.enzyme]
|
|
)
|
|
|
|
def _structures(self):
|
|
s = []
|
|
for entry in self.structures:
|
|
s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
|
|
return _write_kegg(
|
|
"STRUCTURES", [_wrap_kegg(line, wrap_rule=struct_wrap(5)) for line in s]
|
|
)
|
|
|
|
def _dblinks(self):
|
|
s = []
|
|
for entry in self.dblinks:
|
|
s.append(entry[0] + ": " + " ".join(entry[1]))
|
|
return _write_kegg(
|
|
"DBLINKS", [_wrap_kegg(line, wrap_rule=id_wrap(9)) for line in s]
|
|
)
|
|
|
|
|
|
def parse(handle):
|
|
"""Parse a KEGG Ligan/Compound file, returning Record objects.
|
|
|
|
This is an iterator function, typically used in a for loop. For
|
|
example, using one of the example KEGG files in the Biopython
|
|
test suite,
|
|
|
|
>>> with open("KEGG/compound.sample") as handle:
|
|
... for record in parse(handle):
|
|
... print("%s %s" % (record.entry, record.name[0]))
|
|
...
|
|
C00023 Iron
|
|
C00017 Protein
|
|
C00099 beta-Alanine
|
|
C00294 Inosine
|
|
C00298 Trypsin
|
|
C00348 all-trans-Undecaprenyl phosphate
|
|
C00349 2-Methyl-3-oxopropanoate
|
|
C01386 NH2Mec
|
|
|
|
"""
|
|
record = Record()
|
|
for line in handle:
|
|
if line[:3] == "///":
|
|
yield record
|
|
record = Record()
|
|
continue
|
|
if line[:12] != " ":
|
|
keyword = line[:12]
|
|
data = line[12:].strip()
|
|
if keyword == "ENTRY ":
|
|
words = data.split()
|
|
record.entry = words[0]
|
|
elif keyword == "NAME ":
|
|
data = data.strip(";")
|
|
record.name.append(data)
|
|
elif keyword == "ENZYME ":
|
|
while data:
|
|
column = data[:16]
|
|
data = data[16:]
|
|
enzyme = column.strip()
|
|
record.enzyme.append(enzyme)
|
|
elif keyword == "PATHWAY ":
|
|
map, name = data.split(" ")
|
|
pathway = ("PATH", map, name)
|
|
record.pathway.append(pathway)
|
|
elif keyword == "FORMULA ":
|
|
record.formula = data
|
|
elif keyword in ("MASS ", "EXACT_MASS "):
|
|
record.mass = data
|
|
elif keyword == "DBLINKS ":
|
|
if ":" in data:
|
|
key, values = data.split(":")
|
|
values = values.split()
|
|
row = (key, values)
|
|
record.dblinks.append(row)
|
|
else:
|
|
row = record.dblinks[-1]
|
|
key, values = row
|
|
values.extend(data.split())
|
|
row = key, values
|
|
record.dblinks[-1] = row
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from Bio._utils import run_doctest
|
|
|
|
run_doctest()
|