mirror of
https://github.com/biopython/biopython.git
synced 2025-10-21 06:23:48 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
170 lines
5.1 KiB
Python
Executable File
170 lines
5.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# Copyright 2001 by Gavin E. Crooks. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Extract SCOP domain ATOM and HETATOM records from PDB."""
|
|
|
|
import getopt
|
|
import sys
|
|
from urllib.request import urlretrieve as _urlretrieve
|
|
|
|
from Bio.SCOP import Cla
|
|
from Bio.SCOP import Raf
|
|
|
|
|
|
def usage():
|
|
"""Print a help message."""
|
|
print(
|
|
"""Extract a SCOP domain's ATOM and HETATOM records from the relevant PDB file.
|
|
|
|
For example:
|
|
scop_pdb.py astral-rapid-access-1.55.raf dir.cla.scop.txt_1.55 d3hbib_
|
|
|
|
A result file, d3hbib_.ent, will be generated in the working directory.
|
|
|
|
The required RAF file can be found at [http://astral.stanford.edu/raf.html],
|
|
and the SCOP CLA file at [http://scop.berkeley.edu/parse/index.html].
|
|
|
|
Note: Errors will occur if the PDB file has been altered since the creation
|
|
of the SCOP CLA and ASTRAL RAF files.
|
|
|
|
Usage: scop_pdb [-h] [-i file] [-o file] [-p pdb_url_prefix]
|
|
raf_url cla_url [sid] [sid] [sid] ...
|
|
|
|
-h -- Print this help message.
|
|
|
|
-i file -- Input file name. Each line should start with an sid (Scop domain
|
|
identifier). Blank lines, and lines starting with '#' are
|
|
ignored. If file is '-' then data is read from stdin. If not
|
|
given then sids are taken from the command line.
|
|
|
|
-o file -- Output file name. If '-' then data is written to stdout. If not
|
|
given then data is written to files named sid+'.ent'.
|
|
|
|
-p pdb_url-- A URL for PDB files. The token '%s' will be replaced with the
|
|
4 character PDB ID. If the pdb_url is not given then the latest
|
|
PDB file is retrieved directly from rcsb.org.
|
|
|
|
|
|
raf_url -- The URL or filename of an ASTRAL Rapid Access File sequence map.
|
|
See [http://astral.stanford.edu/raf.html]
|
|
|
|
cla_url -- The URL or filename of a SCOP parsable CLA file.
|
|
See [http://scop.berkeley.edu/parse/index.html]
|
|
|
|
sid -- A SCOP domain identifier. e.g. d3hbib_
|
|
"""
|
|
)
|
|
|
|
|
|
default_pdb_url = (
|
|
"http://www.rcsb.org/pdb/cgi/export.cgi/somefile.pdb?"
|
|
"format=PDB&pdbId=%s&compression=None"
|
|
)
|
|
# default_pdb_url = "file://usr/local/db/pdb/data/010331/snapshot/all/pdb%s.ent" # noqa: E501
|
|
|
|
|
|
def open_pdb(pdbid, pdb_url=None):
|
|
"""Make a local copy of an online pdb file and return a file handle."""
|
|
if pdb_url is None:
|
|
pdb_url = default_pdb_url
|
|
url = pdb_url % pdbid
|
|
fn, header = _urlretrieve(url)
|
|
return open(fn)
|
|
|
|
|
|
def main():
|
|
"""Extract a SCOP domain's ATOM and HETATOM records from a PDB file."""
|
|
try:
|
|
opts, args = getopt.getopt(
|
|
sys.argv[1:], "hp:o:i:", ["help", "usage", "pdb=", "output=", "input="]
|
|
)
|
|
except getopt.GetoptError:
|
|
# show help information and exit:
|
|
usage()
|
|
sys.exit(2)
|
|
|
|
input = None
|
|
in_handle = None
|
|
output = None
|
|
pdb_url = None
|
|
cla_url = None
|
|
raf_url = None
|
|
|
|
for o, a in opts:
|
|
if o in ("-h", "--help", "--usage"):
|
|
usage()
|
|
sys.exit()
|
|
elif o in ("-o", "--output"):
|
|
output = a
|
|
elif o in ("-i", "--input"):
|
|
input = a
|
|
elif o in ("-p", "--pdb"):
|
|
pdb_url = a
|
|
|
|
if len(args) < 2:
|
|
sys.stderr.write("Not enough arguments. Try --help for more details.\n")
|
|
sys.exit(2)
|
|
|
|
raf_url = args[0]
|
|
cla_url = args[1]
|
|
|
|
(raf_filename, headers) = _urlretrieve(raf_url)
|
|
seqMapIndex = Raf.SeqMapIndex(raf_filename)
|
|
|
|
(cla_filename, headers) = _urlretrieve(cla_url)
|
|
claIndex = Cla.Index(cla_filename)
|
|
|
|
if input is None:
|
|
sids = args[2:]
|
|
elif input == "-":
|
|
sids = sys.stdin
|
|
else:
|
|
in_handle = open(input)
|
|
sids = in_handle
|
|
|
|
try:
|
|
for sid in sids:
|
|
if not sid or sid[0:1] == "#":
|
|
continue
|
|
id = sid[0:7]
|
|
pdbid = id[1:5]
|
|
s = pdbid[0:1]
|
|
if s == "0" or s == "s":
|
|
sys.stderr.write(f"No coordinates for domain {id}\n")
|
|
continue
|
|
|
|
if output is None:
|
|
filename = id + ".ent"
|
|
out_handle = open(filename, "w+")
|
|
elif output == "-":
|
|
out_handle = sys.stdout
|
|
else:
|
|
out_handle = open(output, "w+")
|
|
|
|
try:
|
|
try:
|
|
claRec = claIndex[id]
|
|
residues = claRec.residues
|
|
seqMap = seqMapIndex.getSeqMap(residues)
|
|
pdbid = residues.pdbid
|
|
|
|
f = open_pdb(pdbid, pdb_url)
|
|
try:
|
|
seqMap.getAtoms(f, out_handle)
|
|
finally:
|
|
f.close()
|
|
except (OSError, KeyError, RuntimeError) as e:
|
|
sys.stderr.write(f"I cannot do SCOP domain {id} : {e}\n")
|
|
finally:
|
|
out_handle.close()
|
|
finally:
|
|
if in_handle is not None:
|
|
in_handle.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|