mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
230 lines
8.3 KiB
Python
230 lines
8.3 KiB
Python
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Unittests for Bio.Align.Applications interface for PRANK."""
|
|
|
|
import os
|
|
import sys
|
|
import unittest
|
|
import warnings
|
|
|
|
from Bio import AlignIO
|
|
from Bio import BiopythonDeprecationWarning
|
|
from Bio import MissingExternalDependencyError
|
|
from Bio import SeqIO
|
|
from Bio.Nexus.Nexus import NexusError
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore", category=BiopythonDeprecationWarning)
|
|
from Bio.Align.Applications import PrankCommandline
|
|
from Bio.Application import _escape_filename
|
|
|
|
|
|
# Try to avoid problems when the OS is in another language
|
|
os.environ["LANG"] = "C"
|
|
|
|
prank_exe = None
|
|
if sys.platform == "win32":
|
|
try:
|
|
# This can vary depending on the Windows language.
|
|
prog_files = os.environ["PROGRAMFILES"]
|
|
except KeyError:
|
|
prog_files = r"C:\Program Files"
|
|
# For Windows, PRANK just comes as a zip file which contains the
|
|
# prank.exe file which the user could put anywhere. We'll try a few
|
|
# sensible locations under Program Files... and then the full path.
|
|
likely_dirs = [
|
|
"", # Current dir
|
|
prog_files,
|
|
os.path.join(prog_files, "Prank"),
|
|
] + sys.path
|
|
for folder in likely_dirs:
|
|
if os.path.isdir(folder):
|
|
if os.path.isfile(os.path.join(folder, "prank.exe")):
|
|
prank_exe = os.path.join(folder, "prank.exe")
|
|
break
|
|
if prank_exe:
|
|
break
|
|
else:
|
|
from subprocess import getoutput
|
|
|
|
output = getoutput("prank")
|
|
if "not found" not in output and "not recognized" not in output:
|
|
if "prank" in output.lower():
|
|
prank_exe = "prank"
|
|
if not prank_exe:
|
|
raise MissingExternalDependencyError(
|
|
"Install PRANK if you want to use the Bio.Align.Applications wrapper."
|
|
)
|
|
|
|
|
|
class PrankApplication(unittest.TestCase):
|
|
def setUp(self):
|
|
self.infile1 = "Fasta/fa01"
|
|
|
|
def tearDown(self):
|
|
"""Remove generated files.
|
|
|
|
output.1.dnd output.1.fas output.1.xml output.2.dnd output.2.fas output.2.xml
|
|
"""
|
|
if os.path.isfile("output.1.dnd"):
|
|
os.remove("output.1.dnd")
|
|
if os.path.isfile("output.1.fas"):
|
|
os.remove("output.1.fas")
|
|
if os.path.isfile("output.1.xml"):
|
|
os.remove("output.1.xml")
|
|
if os.path.isfile("output.2.dnd"):
|
|
os.remove("output.2.dnd")
|
|
if os.path.isfile("output.2.fas"):
|
|
os.remove("output.2.fas")
|
|
if os.path.isfile("output.2.xml"):
|
|
os.remove("output.2.xml")
|
|
if os.path.isfile("output.1.nex"):
|
|
os.remove("output.1.nex")
|
|
if os.path.isfile("output.2.nex"):
|
|
os.remove("output.2.nex")
|
|
|
|
def test_Prank_simple(self):
|
|
"""Simple round-trip through app with infile.
|
|
|
|
output.?.??? files written to cwd - no way to redirect
|
|
"""
|
|
cmdline = PrankCommandline(prank_exe)
|
|
cmdline.set_parameter("d", self.infile1)
|
|
self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01")
|
|
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
|
|
output, error = cmdline()
|
|
self.assertEqual(error, "")
|
|
self.assertIn("Total time", output)
|
|
|
|
def test_Prank_simple_with_NEXUS_output(self):
|
|
"""Simple round-trip through app with infile, output in NEXUS.
|
|
|
|
output.?.??? files written to cwd - no way to redirect
|
|
"""
|
|
records = list(SeqIO.parse(self.infile1, "fasta"))
|
|
# Try using keyword argument,
|
|
cmdline = PrankCommandline(prank_exe, d=self.infile1)
|
|
# Try using a property,
|
|
cmdline.d = self.infile1
|
|
cmdline.f = 17 # NEXUS format
|
|
cmdline.set_parameter("dots", True)
|
|
self.assertEqual(
|
|
str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots"
|
|
)
|
|
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
|
|
stdout, stderr = cmdline()
|
|
self.assertIn("Total time", stdout)
|
|
self.assertEqual(stderr, "")
|
|
try:
|
|
if os.path.isfile("output.best.nex"):
|
|
# Prank v.130820 and perhaps earlier use ".best.*" output names
|
|
nex_fname = "output.best.nex"
|
|
elif os.path.isfile("output.2.nex"):
|
|
# Older Prank versions use ".2.*" output names
|
|
nex_fname = "output.2.nex"
|
|
else:
|
|
raise RuntimeError("Can't find PRANK's NEXUS output (*.nex)")
|
|
align = AlignIO.read(nex_fname, "nexus")
|
|
for old, new in zip(records, align):
|
|
# Old versions of Prank reduced name to 9 chars
|
|
self.assertTrue(old.id == new.id or old.id[:9] == new.id)
|
|
# infile1 has alignment gaps in it
|
|
self.assertEqual(
|
|
str(new.seq).replace("-", ""), str(old.seq).replace("-", "")
|
|
)
|
|
except NexusError:
|
|
# See bug 3119,
|
|
# Bio.Nexus can't parse output from prank v100701 (1 July 2010)
|
|
pass
|
|
|
|
def test_Prank_complex_command_line(self):
|
|
"""Round-trip with complex command line."""
|
|
cmdline = PrankCommandline(prank_exe)
|
|
cmdline.set_parameter("d", self.infile1)
|
|
cmdline.set_parameter("-gaprate", 0.321)
|
|
cmdline.set_parameter("gapext", 0.6)
|
|
cmdline.set_parameter("-dots", 1) # i.e. True
|
|
# Try using a property:
|
|
cmdline.kappa = 3
|
|
cmdline.skipins = True
|
|
cmdline.set_parameter("-once", True)
|
|
cmdline.realbranches = True
|
|
self.assertEqual(
|
|
str(cmdline),
|
|
_escape_filename(prank_exe)
|
|
+ " -d=Fasta/fa01"
|
|
+ " -dots -gaprate=0.321 -gapext=0.6 -kappa=3"
|
|
+ " -once -skipins -realbranches",
|
|
)
|
|
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
|
|
stdout, stderr = cmdline()
|
|
self.assertIn("Total time", stdout)
|
|
|
|
|
|
class PrankConversion(unittest.TestCase):
|
|
def setUp(self):
|
|
# As these reads are all 36, it can be seen as pre-aligned:
|
|
self.input = "Quality/example.fasta"
|
|
self.output = "temp with space" # prefix, PRANK will pick extensions
|
|
|
|
def conversion(self, prank_number, prank_ext, format):
|
|
"""Get PRANK to do a conversion, and check it with SeqIO."""
|
|
filename = f"{self.output}.{prank_ext}"
|
|
if os.path.isfile(filename):
|
|
os.remove(filename)
|
|
cmdline = PrankCommandline(
|
|
prank_exe,
|
|
d=self.input,
|
|
convert=True,
|
|
f=prank_number,
|
|
o=f'"{self.output}"',
|
|
)
|
|
self.assertEqual(
|
|
str(cmdline),
|
|
_escape_filename(prank_exe)
|
|
+ f' -d={self.input} -o="{self.output}" -f={prank_number} -convert',
|
|
)
|
|
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
|
|
message, error = cmdline()
|
|
self.assertIn("PRANK", message)
|
|
self.assertIn((f"converting '{self.input}' to '{filename}'"), message, message)
|
|
self.assertEqual(error, "")
|
|
self.assertTrue(os.path.isfile(filename))
|
|
old = AlignIO.read(self.input, "fasta")
|
|
# Hack...
|
|
if format == "phylip":
|
|
for record in old:
|
|
record.id = record.id[:10]
|
|
new = AlignIO.read(filename, format)
|
|
self.assertEqual(len(old), len(new))
|
|
for old_r, new_r in zip(old, new):
|
|
self.assertEqual(old_r.id, new_r.id)
|
|
self.assertEqual(old_r.seq, new_r.seq)
|
|
os.remove(filename)
|
|
|
|
def test_convert_to_fasta(self):
|
|
"""Convert FASTA to FASTA format."""
|
|
self.conversion(8, "fas", "fasta")
|
|
|
|
# Prank v.100701 seems to output an invalid file here...
|
|
# def test_convert_to_phylip32(self):
|
|
# """Convert FASTA to PHYLIP 3.2 format."""
|
|
# self.conversion(11, "phy", "phylip")
|
|
|
|
def test_convert_to_phylip(self):
|
|
"""Convert FASTA to PHYLIP format."""
|
|
self.conversion(12, "phy", "phylip")
|
|
|
|
# PRANK truncated the record names in the matrix block. An error?
|
|
# def test_convert_to_paup_nexus(self):
|
|
# """Convert FASTA to PAUP/NEXUS."""
|
|
# self.conversion(17, "nex", "nexus")
|
|
|
|
# We don't support format 18, PAML
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|