mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
212 lines
7.8 KiB
Python
212 lines
7.8 KiB
Python
# Copyright 2013 by Saket Choudhary. Based on test_Clustalw_tool.py by Peter
|
|
# Cock .
|
|
#
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
|
|
"""Tests for calling BWA."""
|
|
|
|
import os
|
|
import sys
|
|
import unittest
|
|
import warnings
|
|
|
|
from Bio import BiopythonDeprecationWarning
|
|
from Bio import MissingExternalDependencyError
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore", category=BiopythonDeprecationWarning)
|
|
# TODO from Bio.Sequencing.Applications import BwaBwaswCommandline
|
|
from Bio.Sequencing.Applications import BwaAlignCommandline
|
|
from Bio.Sequencing.Applications import BwaIndexCommandline
|
|
from Bio.Sequencing.Applications import BwaMemCommandline
|
|
from Bio.Sequencing.Applications import BwaSampeCommandline
|
|
from Bio.Sequencing.Applications import BwaSamseCommandline
|
|
|
|
|
|
#################################################################
|
|
|
|
# Try to avoid problems when the OS is in another language
|
|
os.environ["LANG"] = "C"
|
|
|
|
bwa_exe = None
|
|
if sys.platform == "win32":
|
|
# TODO - Check the path?
|
|
try:
|
|
# This can vary depending on the Windows language.
|
|
prog_files = os.environ["PROGRAMFILES"]
|
|
except KeyError:
|
|
prog_files = r"C:\Program Files"
|
|
likely_dirs = ["bwa", "bwa-0.6.2", ""]
|
|
likely_exes = ["bwa"]
|
|
for folder in likely_dirs:
|
|
if os.path.isdir(os.path.join(prog_files, folder)):
|
|
for filename in likely_exes:
|
|
if os.path.isfile(os.path.join(prog_files, folder, filename)):
|
|
bwa_exe = os.path.join(prog_files, folder, filename)
|
|
break
|
|
if bwa_exe:
|
|
break
|
|
else:
|
|
from subprocess import getoutput
|
|
|
|
output = getoutput("bwa")
|
|
|
|
# Since "not found" may be in another language, try and be sure this is
|
|
# really the bwa tool's output
|
|
bwa_found = False
|
|
if "not found" not in output and "not recognized" not in output:
|
|
if "bwa" in output and "alignment via Burrows-Wheeler transformation" in output:
|
|
bwa_exe = "bwa"
|
|
skip_aln_tests = False
|
|
aln_output = getoutput("bwa aln")
|
|
if "unrecognized" in aln_output:
|
|
skip_aln_tests = True
|
|
print("'bwa aln' is unrecognized, skipping aln/samse/sampe tests")
|
|
|
|
if not bwa_exe:
|
|
raise MissingExternalDependencyError(
|
|
"Install bwa and correctly set"
|
|
" the file path to the program if"
|
|
" you want to use it from Biopython"
|
|
)
|
|
|
|
|
|
class BwaTestCase(unittest.TestCase):
|
|
"""Class for implementing BWA test cases."""
|
|
|
|
def setUp(self):
|
|
self.reference_file = "BWA/human_g1k_v37_truncated.fasta"
|
|
self.reference_extensions = ["amb", "ann", "bwt", "pac", "sa"]
|
|
self.infile1 = "BWA/HNSCC1_1_truncated.fastq"
|
|
self.infile2 = "BWA/HNSCC1_2_truncated.fastq"
|
|
self.saifile1 = "BWA/1.sai"
|
|
self.saifile2 = "BWA/2.sai"
|
|
self.samfile1 = "BWA/1.sam"
|
|
self.samfile2 = "BWA/2.sam"
|
|
self.samfile = "BWA/out.sam"
|
|
self.files_to_clean = [
|
|
self.saifile1,
|
|
self.saifile2,
|
|
self.samfile1,
|
|
self.samfile2,
|
|
self.samfile,
|
|
]
|
|
|
|
def tearDown(self):
|
|
for filename in self.files_to_clean:
|
|
if os.path.isfile(filename):
|
|
os.remove(filename)
|
|
for extension in self.reference_extensions:
|
|
index_file = self.reference_file + "." + extension
|
|
if os.path.exists(index_file):
|
|
os.remove(index_file)
|
|
|
|
def test_index(self):
|
|
"""Test for creating index files for the reference genome fasta file."""
|
|
cmdline = BwaIndexCommandline(bwa_exe)
|
|
cmdline.set_parameter("infile", self.reference_file)
|
|
cmdline.set_parameter("algorithm", "bwtsw")
|
|
stdout, stderr = cmdline()
|
|
for extension in self.reference_extensions:
|
|
index_file = self.reference_file + "." + extension
|
|
self.assertTrue(
|
|
os.path.exists(index_file), f"Index File {index_file} not found"
|
|
)
|
|
self.assertIn(
|
|
"Finished constructing BWT",
|
|
str(stdout) + str(stderr),
|
|
f"FASTA indexing failed:\n{cmdline}\nStdout:{stdout}\nStderr:{stderr}\n",
|
|
)
|
|
|
|
def do_aln(self, in_file, out_file):
|
|
"""Test for generating sai files given the reference and read file."""
|
|
cmdline = BwaAlignCommandline(bwa_exe)
|
|
cmdline.set_parameter("reference", self.reference_file)
|
|
cmdline.read_file = in_file
|
|
self.assertTrue(os.path.isfile(in_file))
|
|
stdout, stderr = cmdline(stdout=out_file)
|
|
self.assertNotIn(
|
|
"fail to locate the index",
|
|
str(stderr) + str(stdout),
|
|
"Error aligning sequence to reference:\n%s\nStdout:%s\nStderr:%s\n"
|
|
% (cmdline, stdout, stderr),
|
|
)
|
|
|
|
def create_fasta_index(self):
|
|
"""Test for generating index for fasta file.
|
|
|
|
BWA requires an indexed fasta for each alignment operation.
|
|
This should be called to create an index before any alignment
|
|
operation.
|
|
"""
|
|
cmdline = BwaIndexCommandline(bwa_exe)
|
|
cmdline.set_parameter("infile", self.reference_file)
|
|
cmdline.set_parameter("algorithm", "bwtsw")
|
|
stdout, stderr = cmdline()
|
|
|
|
if not skip_aln_tests:
|
|
|
|
def test_samse(self):
|
|
"""Test for single end sequencing."""
|
|
self.create_fasta_index()
|
|
self.do_aln(self.infile1, self.saifile1)
|
|
cmdline = BwaSamseCommandline(bwa_exe)
|
|
cmdline.set_parameter("reference", self.reference_file)
|
|
cmdline.set_parameter("read_file", self.infile1)
|
|
cmdline.set_parameter("sai_file", self.saifile1)
|
|
stdout, stderr = cmdline(stdout=self.samfile1)
|
|
|
|
with open(self.samfile1) as handle:
|
|
headline = handle.readline()
|
|
self.assertTrue(
|
|
headline.startswith("@SQ"),
|
|
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
|
|
)
|
|
|
|
def test_sampe(self):
|
|
"""Test for generating samfile by paired end sequencing."""
|
|
self.create_fasta_index()
|
|
|
|
# Generate sai files from paired end data
|
|
self.do_aln(self.infile1, self.saifile1)
|
|
self.do_aln(self.infile2, self.saifile2)
|
|
|
|
cmdline = BwaSampeCommandline(bwa_exe)
|
|
cmdline.set_parameter("reference", self.reference_file)
|
|
cmdline.set_parameter("sai_file1", self.saifile1)
|
|
cmdline.set_parameter("sai_file2", self.saifile2)
|
|
cmdline.set_parameter("read_file1", self.infile1)
|
|
cmdline.set_parameter("read_file2", self.infile2)
|
|
stdout, stderr = cmdline(stdout=self.samfile)
|
|
|
|
with open(self.samfile) as handle:
|
|
headline = handle.readline()
|
|
self.assertTrue(
|
|
headline.startswith("@SQ"),
|
|
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
|
|
)
|
|
|
|
def test_mem(self):
|
|
"""Test for generating samfile by paired end sequencing using BWA-MEM."""
|
|
self.create_fasta_index()
|
|
|
|
cmdline = BwaMemCommandline(bwa_exe)
|
|
cmdline.set_parameter("reference", self.reference_file)
|
|
cmdline.set_parameter("read_file1", self.infile1)
|
|
cmdline.set_parameter("read_file2", self.infile2)
|
|
stdout, stderr = cmdline(stdout=self.samfile)
|
|
|
|
with open(self.samfile) as handle:
|
|
headline = handle.readline()
|
|
self.assertTrue(
|
|
headline.startswith("@SQ"),
|
|
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|