Files
biopython/Bio/SwissProt/KeyWList.py
mdehoon 5ffdce852b remove spurious print statements (#4198)
* remove spurious print statements

* remove unneeded import

* add comment
2022-12-13 18:22:11 +09:00

91 lines
3.4 KiB
Python

# Copyright 1999 by Jeffrey Chang. All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Code to parse the keywlist.txt file from SwissProt/UniProt.
See:
- https://www.uniprot.org/docs/keywlist.txt
Classes:
- Record Stores the information about one keyword or one category
in the keywlist.txt file.
Functions:
- parse Parses the keywlist.txt file and returns an iterator to
the records it contains.
"""
class Record(dict):
"""Store information of one keyword or category from the keywords list.
This record stores the information of one keyword or category in the
keywlist.txt as a Python dictionary. The keys in this dictionary are
the line codes that can appear in the keywlist.txt file::
--------- --------------------------- ----------------------
Line code Content Occurrence in an entry
--------- --------------------------- ----------------------
ID Identifier (keyword) Once; starts a keyword entry
IC Identifier (category) Once; starts a category entry
AC Accession (KW-xxxx) Once
DE Definition Once or more
SY Synonyms Optional; once or more
GO Gene ontology (GO) mapping Optional; once or more
HI Hierarchy Optional; once or more
WW Relevant WWW site Optional; once or more
CA Category Once per keyword entry; absent
in category entries
"""
def __init__(self):
"""Initialize the class."""
dict.__init__(self)
for keyword in ("DE", "SY", "GO", "HI", "WW"):
self[keyword] = []
def parse(handle):
"""Parse the keyword list from file handle.
Returns a generator object which yields keyword entries as
Bio.SwissProt.KeyWList.Record() object.
"""
record = Record()
# First, skip the header - look for start of a record
for line in handle:
if line.startswith("ID "):
# Looks like there was no header
record["ID"] = line[5:].strip()
break
if line.startswith("IC "):
# Looks like there was no header
record["IC"] = line[5:].strip()
break
# Now parse the records
for line in handle:
if line.startswith("-------------------------------------"):
# We have reached the footer
break
key = line[:2]
if key == "//":
record["DE"] = " ".join(record["DE"])
record["SY"] = " ".join(record["SY"])
yield record
record = Record()
elif line[2:5] == " ":
value = line[5:].strip()
if key in ("ID", "IC", "AC", "CA"):
record[key] = value
elif key in ("DE", "SY", "GO", "HI", "WW"):
record[key].append(value)
else:
raise ValueError(f"Cannot parse line '{line.strip()}'")
# Read the footer and throw it away
for line in handle:
pass