Files
biopython/Tests/test_SearchIO_blast_tab.py
Peter Cock e9496f5e88 Automatically remove redundant commas
Used this hack from https://github.com/psf/black/issues/1288

$ pip install black==19.3b0 && black . && pip install black==19.10b && black .

I then manually reverted changes to a handful of explicit data
structures where the magic trailing comma should be retained
(indicates to black not to squash into one line).

Doing this dramatically improves the changes from trying black
version 21.7b0 (right now just four minor changes).
2021-08-27 13:42:55 +01:00

985 lines
39 KiB
Python

# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for SearchIO BlastIO parsers."""
import os
import unittest
from Bio.SearchIO import parse
from Bio.SearchIO.BlastIO.blast_tab import _LONG_SHORT_MAP as all_fields
# test case files are in the Blast directory
TEST_DIR = "Blast"
FMT = "blast-tab"
def get_file(filename):
"""Return the path of a test file."""
return os.path.join(TEST_DIR, filename)
class BlastTabCases(unittest.TestCase):
"""Tests for the tab-separated BLAST parser."""
def test_tab_2228_tblastn_001(self):
"""Test parsing TBLASTN 2.2.28+ tabular output (tab_2228_tblastn_001)."""
tab_file = get_file("tab_2228_tblastn_001.txt")
qresults = list(
parse(
tab_file, FMT, fields=["evalue", "sallseqid", "qseqid"], comments=True
)
)
self.assertEqual(1, len(qresults))
self.assertEqual(10, len(qresults[0].hits))
# there is one hit with an alternative ID
self.assertEqual(
qresults[0]["gi|148227873|ref|NM_001095167.1|"],
qresults[0]["gi|55250552|gb|BC086280.1|"],
)
# check some of the HSPs
self.assertEqual(0.0, qresults[0][0][0].evalue)
self.assertEqual(8e-173, qresults[0][-1][0].evalue)
def test_tab_2228_tblastx_001(self):
"""Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)."""
tab_file = get_file("tab_2228_tblastx_001.txt")
qresults = list(
parse(tab_file, FMT, fields=list(all_fields.values()), comments=True)
)
# this a single query, with 192 hits and 243 hsps
self.assertEqual(1, len(qresults))
self.assertEqual(192, len(qresults[0].hits))
self.assertEqual(243, sum(len(x) for x in qresults[0]))
# there is one hit with an alternative ID
self.assertEqual(
qresults[0]["gi|31126987|gb|AY255526.2|"],
qresults[0]["gi|31342050|ref|NM_181083.2|"],
)
# only checking the new fields in 2.2.28+
hit = qresults[0][0]
self.assertEqual(["NM_001183135", "EF059095"], hit.accession_all)
self.assertEqual(["32630", "559292"], hit.tax_ids)
self.assertEqual(["N/A", "N/A"], hit.sci_names)
self.assertEqual(["N/A", "N/A"], hit.com_names)
self.assertEqual(["N/A"], hit.blast_names)
self.assertEqual(["N/A"], hit.super_kingdoms)
self.assertEqual("Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA", hit.title)
self.assertEqual(
[
"Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA",
"Synthetic construct Saccharomyces cerevisiae clone "
"FLH203015.01X MON2, complete sequence",
],
hit.title_all,
)
self.assertEqual("N/A", hit.strand)
self.assertEqual(100.0, hit.query_coverage)
for hsp in hit[:4]:
# shorthand ~ the values just happen to all be 99
# in other cases, they may be different
self.assertEqual(99.0, hsp.query_coverage)
self.assertEqual(73.0, hit[5].query_coverage)
self.assertEqual(12.0, hit[6].query_coverage)
def test_tab_2226_tblastn_001(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_001)."""
xml_file = get_file("tab_2226_tblastn_001.txt")
qresults = parse(xml_file, FMT)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(95.92, hsp.ident_pct)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(4, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(29.58, hsp.ident_pct)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(46, hsp.mismatch_num)
self.assertEqual(2, hsp.gapopen_num)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
def test_tab_2226_tblastn_002(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_002)."""
xml_file = get_file("tab_2226_tblastn_002.txt")
qresults = parse(xml_file, FMT)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
def test_tab_2226_tblastn_003(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_003)."""
xml_file = get_file("tab_2226_tblastn_003.txt")
qresults = parse(xml_file, FMT)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_tab_2226_tblastn_004(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_004)."""
xml_file = get_file("tab_2226_tblastn_004.txt")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(95.92, hsp.ident_pct)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(4, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(29.58, hsp.ident_pct)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(46, hsp.mismatch_num)
self.assertEqual(2, hsp.gapopen_num)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_tab_2226_tblastn_005(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_005)."""
xml_file = get_file("tab_2226_tblastn_005.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("random_s00", qresult.id)
self.assertEqual(0, len(qresult))
# test second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(95.92, hsp.ident_pct)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(4, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(29.58, hsp.ident_pct)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(46, hsp.mismatch_num)
self.assertEqual(2, hsp.gapopen_num)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_tab_2226_tblastn_005_comments_false(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_005)."""
tab_file = get_file("tab_2226_tblastn_005.txt")
exc_msg = (
"Encountered unexpected character '#' at the beginning of a line. "
"Set comments=True if the file is a commented file."
)
qresults = parse(tab_file, FMT)
with self.assertRaises(ValueError, msg=exc_msg):
next(qresults)
def test_tab_2226_tblastn_006(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006)."""
xml_file = get_file("tab_2226_tblastn_006.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("random_s00", qresult.id)
self.assertEqual(0, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_tab_2226_tblastn_007(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_007)."""
xml_file = get_file("tab_2226_tblastn_007.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_tab_2226_tblastn_008(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_008)."""
xml_file = get_file("tab_2226_tblastn_008.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(95.92, hsp.ident_pct)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(4, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(29.58, hsp.ident_pct)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(46, hsp.mismatch_num)
self.assertEqual(2, hsp.gapopen_num)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_tab_2226_tblastn_009(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_009)."""
xml_file = get_file("tab_2226_tblastn_009.txt")
qresults = parse(xml_file, FMT, fields=("qseqid", "sseqid"))
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("<unknown program>", qresult.program)
self.assertEqual("<unknown target>", qresult.target)
self.assertEqual("<unknown version>", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("<unknown program>", qresult.program)
self.assertEqual("<unknown target>", qresult.target)
self.assertEqual("<unknown version>", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
hsp = hit[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
def test_tab_2226_tblastn_010(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_010)."""
xml_file = get_file("tab_2226_tblastn_010.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("random_s00", qresult.id)
self.assertEqual(0, len(qresult))
# test second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_tab_2226_tblastn_011(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_011)."""
xml_file = get_file("tab_2226_tblastn_011.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("random_s00", qresult.id)
self.assertEqual(0, len(qresult))
# test second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.accession)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.accession_version)
self.assertEqual("0", qresult.gi)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual(["gi|145479850|ref|XM_001425911.1|"], hit.id_all)
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.accession)
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.accession_version)
self.assertEqual("0", hit.gi)
self.assertEqual("0", hit.gi_all)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(4632, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq)
self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq)
self.assertEqual(78, hsp.bitscore_raw)
self.assertEqual(15, hsp.ident_num)
self.assertEqual(26, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(60.47, hsp.pos_pct)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.accession)
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.accession_version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
self.assertEqual(
"GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL", hsp.query.seq
)
self.assertEqual(
"GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL", hsp.hit.seq
)
self.assertEqual(70.0, hsp.bitscore_raw)
self.assertEqual(20, hsp.ident_num)
self.assertEqual(29, hsp.pos_num)
self.assertEqual(8, hsp.gap_num)
self.assertEqual(49.15, hsp.pos_pct)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("gi|11464971:4-101", qresult.accession)
self.assertEqual("gi|11464971:4-101", qresult.accession_version)
self.assertEqual("0", qresult.gi)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual(["gi|350596019|ref|XM_003360601.2|"], hit.id_all)
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.accession)
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.accession_version)
self.assertEqual("0", hit.gi)
self.assertEqual("0", hit.gi_all)
self.assertEqual("gi|11464971:4-101", hit.query_id)
self.assertEqual(772, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(95.92, hsp.ident_pct)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(4, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(2e-67, hsp.evalue)
self.assertEqual(199, hsp.bitscore)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(506.0, hsp.bitscore_raw)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(97.96, hsp.pos_pct)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
hsp = hit.hsps[-1]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
self.assertEqual("gi|11464971:4-101", hsp.query_id)
self.assertEqual(29.58, hsp.ident_pct)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(46, hsp.mismatch_num)
self.assertEqual(2, hsp.gapopen_num)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(4e-05, hsp.evalue)
self.assertEqual(32.7, hsp.bitscore)
self.assertEqual(
"IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(73.0, hsp.bitscore_raw)
self.assertEqual(21, hsp.ident_num)
self.assertEqual(33, hsp.pos_num)
self.assertEqual(4, hsp.gap_num)
self.assertEqual(46.48, hsp.pos_pct)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_tab_2226_tblastn_012(self):
"""Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)."""
xml_file = get_file("tab_2226_tblastn_012.txt")
qresults = parse(xml_file, FMT, comments=True)
counter = 0
# test first qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("random_s00", qresult.id)
self.assertEqual("X76FDCG9016", qresult.rid)
self.assertEqual(0, len(qresult))
# test second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual("X76FDCG9016", qresult.rid)
self.assertEqual(3, len(qresult))
# test last qresult
qresult = next(qresults)
counter += 1
self.assertEqual("tblastn", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("X76FDCG9016", qresult.rid)
self.assertEqual(5, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_tab_2226_tblastn_013(self):
"""Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_013)."""
xml_file = get_file("tab_2226_tblastn_013.txt")
qresults = parse(xml_file, FMT, fields="qseq std sseq")
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("<unknown program>", qresult.program)
self.assertEqual("<unknown target>", qresult.target)
self.assertEqual("<unknown version>", qresult.version)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(34.88, hsp.ident_pct)
self.assertEqual(43, hsp.aln_span)
self.assertEqual(28, hsp.mismatch_num)
self.assertEqual(0, hsp.gapopen_num)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(1e-05, hsp.evalue)
self.assertEqual(34.7, hsp.bitscore)
self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq)
self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq)
hit = qresult[-1]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
self.assertEqual(33.90, hsp.ident_pct)
self.assertEqual(59, hsp.aln_span)
self.assertEqual(31, hsp.mismatch_num)
self.assertEqual(1, hsp.gapopen_num)
self.assertEqual(43, hsp.query_start)
self.assertEqual(94, hsp.query_end)
self.assertEqual(1056, hsp.hit_start)
self.assertEqual(1233, hsp.hit_end)
self.assertEqual(1e-04, hsp.evalue)
self.assertEqual(31.6, hsp.bitscore)
self.assertEqual(
"GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL", hsp.query.seq
)
self.assertEqual(
"GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL", hsp.hit.seq
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)