Files
biopython/Bio/KEGG/Compound/__init__.py
ruff-isort de0bb21fb3 Apply isort (forcing single lines, not sorting by type) via ruff
$ ruff check --fix --select=I \
  --config=lint.isort.force-single-line=true \
  --config=lint.isort.order-by-type=false \
  BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py

Using ruff version 0.4.10
2024-06-26 15:31:39 +09:00

181 lines
5.3 KiB
Python

# Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
# Copyright 2007 by Michiel de Hoon. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Code to work with the KEGG Ligand/Compound database.
Functions:
- parse - Returns an iterator giving Record objects.
Classes:
- Record - A representation of a KEGG Ligand/Compound.
"""
from Bio.KEGG import _default_wrap
from Bio.KEGG import _struct_wrap
from Bio.KEGG import _wrap_kegg
from Bio.KEGG import _write_kegg
# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
id_wrap = _default_wrap
struct_wrap = _struct_wrap
class Record:
"""Holds info from a KEGG Ligand/Compound record.
Attributes:
- entry The entry identifier.
- name A list of the compound names.
- formula The chemical formula for the compound
- mass The molecular weight for the compound
- pathway A list of 3-tuples: ('PATH', pathway id, pathway)
- enzyme A list of the EC numbers.
- structures A list of 2-tuples: (database, list of struct ids)
- dblinks A list of 2-tuples: (database, list of link ids)
"""
def __init__(self):
"""Initialize as new record."""
self.entry = ""
self.name = []
self.formula = ""
self.mass = ""
self.pathway = []
self.enzyme = []
self.structures = []
self.dblinks = []
def __str__(self):
"""Return a string representation of this Record."""
return (
self._entry()
+ self._name()
+ self._formula()
+ self._mass()
+ self._pathway()
+ self._enzyme()
+ self._structures()
+ self._dblinks()
+ "///"
)
def _entry(self):
return _write_kegg("ENTRY", [self.entry])
def _name(self):
return _write_kegg(
"NAME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.name]
)
def _formula(self):
return _write_kegg("FORMULA", [self.formula])
def _mass(self):
return _write_kegg("MASS", [self.mass])
def _pathway(self):
s = []
for entry in self.pathway:
s.append(entry[0] + " " + entry[1])
return _write_kegg(
"PATHWAY", [_wrap_kegg(line, wrap_rule=id_wrap(16)) for line in s]
)
def _enzyme(self):
return _write_kegg(
"ENZYME", [_wrap_kegg(line, wrap_rule=name_wrap) for line in self.enzyme]
)
def _structures(self):
s = []
for entry in self.structures:
s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
return _write_kegg(
"STRUCTURES", [_wrap_kegg(line, wrap_rule=struct_wrap(5)) for line in s]
)
def _dblinks(self):
s = []
for entry in self.dblinks:
s.append(entry[0] + ": " + " ".join(entry[1]))
return _write_kegg(
"DBLINKS", [_wrap_kegg(line, wrap_rule=id_wrap(9)) for line in s]
)
def parse(handle):
"""Parse a KEGG Ligan/Compound file, returning Record objects.
This is an iterator function, typically used in a for loop. For
example, using one of the example KEGG files in the Biopython
test suite,
>>> with open("KEGG/compound.sample") as handle:
... for record in parse(handle):
... print("%s %s" % (record.entry, record.name[0]))
...
C00023 Iron
C00017 Protein
C00099 beta-Alanine
C00294 Inosine
C00298 Trypsin
C00348 all-trans-Undecaprenyl phosphate
C00349 2-Methyl-3-oxopropanoate
C01386 NH2Mec
"""
record = Record()
for line in handle:
if line[:3] == "///":
yield record
record = Record()
continue
if line[:12] != " ":
keyword = line[:12]
data = line[12:].strip()
if keyword == "ENTRY ":
words = data.split()
record.entry = words[0]
elif keyword == "NAME ":
data = data.strip(";")
record.name.append(data)
elif keyword == "ENZYME ":
while data:
column = data[:16]
data = data[16:]
enzyme = column.strip()
record.enzyme.append(enzyme)
elif keyword == "PATHWAY ":
map, name = data.split(" ")
pathway = ("PATH", map, name)
record.pathway.append(pathway)
elif keyword == "FORMULA ":
record.formula = data
elif keyword in ("MASS ", "EXACT_MASS "):
record.mass = data
elif keyword == "DBLINKS ":
if ":" in data:
key, values = data.split(":")
values = values.split()
row = (key, values)
record.dblinks.append(row)
else:
row = record.dblinks[-1]
key, values = row
values.extend(data.split())
row = key, values
record.dblinks[-1] = row
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()