Files
biopython/Tests/test_BWA_tool.py
ruff-isort de0bb21fb3 Apply isort (forcing single lines, not sorting by type) via ruff
$ ruff check --fix --select=I \
  --config=lint.isort.force-single-line=true \
  --config=lint.isort.order-by-type=false \
  BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py

Using ruff version 0.4.10
2024-06-26 15:31:39 +09:00

212 lines
7.8 KiB
Python

# Copyright 2013 by Saket Choudhary. Based on test_Clustalw_tool.py by Peter
# Cock .
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for calling BWA."""
import os
import sys
import unittest
import warnings
from Bio import BiopythonDeprecationWarning
from Bio import MissingExternalDependencyError
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=BiopythonDeprecationWarning)
# TODO from Bio.Sequencing.Applications import BwaBwaswCommandline
from Bio.Sequencing.Applications import BwaAlignCommandline
from Bio.Sequencing.Applications import BwaIndexCommandline
from Bio.Sequencing.Applications import BwaMemCommandline
from Bio.Sequencing.Applications import BwaSampeCommandline
from Bio.Sequencing.Applications import BwaSamseCommandline
#################################################################
# Try to avoid problems when the OS is in another language
os.environ["LANG"] = "C"
bwa_exe = None
if sys.platform == "win32":
# TODO - Check the path?
try:
# This can vary depending on the Windows language.
prog_files = os.environ["PROGRAMFILES"]
except KeyError:
prog_files = r"C:\Program Files"
likely_dirs = ["bwa", "bwa-0.6.2", ""]
likely_exes = ["bwa"]
for folder in likely_dirs:
if os.path.isdir(os.path.join(prog_files, folder)):
for filename in likely_exes:
if os.path.isfile(os.path.join(prog_files, folder, filename)):
bwa_exe = os.path.join(prog_files, folder, filename)
break
if bwa_exe:
break
else:
from subprocess import getoutput
output = getoutput("bwa")
# Since "not found" may be in another language, try and be sure this is
# really the bwa tool's output
bwa_found = False
if "not found" not in output and "not recognized" not in output:
if "bwa" in output and "alignment via Burrows-Wheeler transformation" in output:
bwa_exe = "bwa"
skip_aln_tests = False
aln_output = getoutput("bwa aln")
if "unrecognized" in aln_output:
skip_aln_tests = True
print("'bwa aln' is unrecognized, skipping aln/samse/sampe tests")
if not bwa_exe:
raise MissingExternalDependencyError(
"Install bwa and correctly set"
" the file path to the program if"
" you want to use it from Biopython"
)
class BwaTestCase(unittest.TestCase):
"""Class for implementing BWA test cases."""
def setUp(self):
self.reference_file = "BWA/human_g1k_v37_truncated.fasta"
self.reference_extensions = ["amb", "ann", "bwt", "pac", "sa"]
self.infile1 = "BWA/HNSCC1_1_truncated.fastq"
self.infile2 = "BWA/HNSCC1_2_truncated.fastq"
self.saifile1 = "BWA/1.sai"
self.saifile2 = "BWA/2.sai"
self.samfile1 = "BWA/1.sam"
self.samfile2 = "BWA/2.sam"
self.samfile = "BWA/out.sam"
self.files_to_clean = [
self.saifile1,
self.saifile2,
self.samfile1,
self.samfile2,
self.samfile,
]
def tearDown(self):
for filename in self.files_to_clean:
if os.path.isfile(filename):
os.remove(filename)
for extension in self.reference_extensions:
index_file = self.reference_file + "." + extension
if os.path.exists(index_file):
os.remove(index_file)
def test_index(self):
"""Test for creating index files for the reference genome fasta file."""
cmdline = BwaIndexCommandline(bwa_exe)
cmdline.set_parameter("infile", self.reference_file)
cmdline.set_parameter("algorithm", "bwtsw")
stdout, stderr = cmdline()
for extension in self.reference_extensions:
index_file = self.reference_file + "." + extension
self.assertTrue(
os.path.exists(index_file), f"Index File {index_file} not found"
)
self.assertIn(
"Finished constructing BWT",
str(stdout) + str(stderr),
f"FASTA indexing failed:\n{cmdline}\nStdout:{stdout}\nStderr:{stderr}\n",
)
def do_aln(self, in_file, out_file):
"""Test for generating sai files given the reference and read file."""
cmdline = BwaAlignCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.read_file = in_file
self.assertTrue(os.path.isfile(in_file))
stdout, stderr = cmdline(stdout=out_file)
self.assertNotIn(
"fail to locate the index",
str(stderr) + str(stdout),
"Error aligning sequence to reference:\n%s\nStdout:%s\nStderr:%s\n"
% (cmdline, stdout, stderr),
)
def create_fasta_index(self):
"""Test for generating index for fasta file.
BWA requires an indexed fasta for each alignment operation.
This should be called to create an index before any alignment
operation.
"""
cmdline = BwaIndexCommandline(bwa_exe)
cmdline.set_parameter("infile", self.reference_file)
cmdline.set_parameter("algorithm", "bwtsw")
stdout, stderr = cmdline()
if not skip_aln_tests:
def test_samse(self):
"""Test for single end sequencing."""
self.create_fasta_index()
self.do_aln(self.infile1, self.saifile1)
cmdline = BwaSamseCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.set_parameter("read_file", self.infile1)
cmdline.set_parameter("sai_file", self.saifile1)
stdout, stderr = cmdline(stdout=self.samfile1)
with open(self.samfile1) as handle:
headline = handle.readline()
self.assertTrue(
headline.startswith("@SQ"),
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
)
def test_sampe(self):
"""Test for generating samfile by paired end sequencing."""
self.create_fasta_index()
# Generate sai files from paired end data
self.do_aln(self.infile1, self.saifile1)
self.do_aln(self.infile2, self.saifile2)
cmdline = BwaSampeCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.set_parameter("sai_file1", self.saifile1)
cmdline.set_parameter("sai_file2", self.saifile2)
cmdline.set_parameter("read_file1", self.infile1)
cmdline.set_parameter("read_file2", self.infile2)
stdout, stderr = cmdline(stdout=self.samfile)
with open(self.samfile) as handle:
headline = handle.readline()
self.assertTrue(
headline.startswith("@SQ"),
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
)
def test_mem(self):
"""Test for generating samfile by paired end sequencing using BWA-MEM."""
self.create_fasta_index()
cmdline = BwaMemCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.set_parameter("read_file1", self.infile1)
cmdline.set_parameter("read_file2", self.infile2)
stdout, stderr = cmdline(stdout=self.samfile)
with open(self.samfile) as handle:
headline = handle.readline()
self.assertTrue(
headline.startswith("@SQ"),
f"Error generating sam files:\n{cmdline}\nOutput starts:{headline}",
)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)