Files
biopython/Tests/test_SearchIO_blast_xml.py
ruff-isort de0bb21fb3 Apply isort (forcing single lines, not sorting by type) via ruff
$ ruff check --fix --select=I \
  --config=lint.isort.force-single-line=true \
  --config=lint.isort.order-by-type=false \
  BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py

Using ruff version 0.4.10
2024-06-26 15:31:39 +09:00

4072 lines
169 KiB
Python

# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for SearchIO BlastIO parsers."""
import os
import unittest
import warnings
from Bio import BiopythonParserWarning
from Bio.SearchIO import parse
# test case files are in the Blast directory
TEST_DIR = "Blast"
FMT = "blast-xml"
REFERENCE = (
"Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, "
"Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of protein database '
'search programs", Nucleic Acids Res. 25:3389-3402.'
)
def get_file(filename):
"""Return the path of a test file."""
return os.path.join(TEST_DIR, filename)
class BlastnXmlCases(unittest.TestCase):
def test_xml_2212L_blastn_001(self):
xml_file = get_file("xml_2212L_blastn_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.12", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual(1, qresult.param_score_match)
self.assertEqual(-3, qresult.param_score_mismatch)
self.assertEqual(5, qresult.param_gap_open)
self.assertEqual(2, qresult.param_gap_extend)
# test parsed values of qresult
self.assertEqual("gi|1348916|gb|G26684.1|G26684", qresult.id)
self.assertEqual(
"human STS STS_D11570, sequence tagged site", qresult.description
)
self.assertEqual(285, qresult.seq_len)
self.assertEqual(371021, qresult.stat_db_num)
self.assertEqual(1233631384, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.710603, qresult.stat_kappa)
self.assertEqual(1.37406, qresult.stat_lambda)
self.assertEqual(1.30725, qresult.stat_entropy)
self.assertEqual(2, len(qresult))
hit = qresult[0]
self.assertEqual("gi|9950606|gb|AE004854.1|", hit.id)
self.assertEqual(
"Pseudomonas aeruginosa PAO1, section 415 of 529 of the complete genome",
hit.description,
)
self.assertEqual(11884, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(38.1576, hsp.bitscore)
self.assertEqual(19, hsp.bitscore_raw)
self.assertEqual(1.0598, hsp.evalue)
self.assertEqual(67, hsp.query_start)
self.assertEqual(86, hsp.query_end)
self.assertEqual(6011, hsp.hit_start)
self.assertEqual(6030, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(19, hsp.ident_num)
self.assertEqual(19, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(19, hsp.aln_span)
self.assertEqual("CAGGCCAGCGACTTCTGGG", hsp.query.seq)
self.assertEqual("CAGGCCAGCGACTTCTGGG", hsp.hit.seq)
self.assertEqual("|||||||||||||||||||", hsp.aln_annotation["similarity"])
self.assertRaises(IndexError, hit.__getitem__, 1)
# parse last hit
hit = qresult[-1]
self.assertEqual("gi|15073988|emb|AL591786.1|SME591786", hit.id)
self.assertEqual(
"Sinorhizobium meliloti 1021 complete chromosome; segment 5/12",
hit.description,
)
self.assertEqual(299350, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(36.1753, hsp.bitscore)
self.assertEqual(18, hsp.bitscore_raw)
self.assertEqual(4.18768, hsp.evalue)
self.assertEqual(203, hsp.query_start)
self.assertEqual(224, hsp.query_end)
self.assertEqual(83627, hsp.hit_start)
self.assertEqual(83648, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(-1, hsp.hit_frame)
self.assertEqual(20, hsp.ident_num)
self.assertEqual(20, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(21, hsp.aln_span)
self.assertEqual("TGAAAGGAAATNAAAATGGAA", hsp.query.seq)
self.assertEqual("TGAAAGGAAATCAAAATGGAA", hsp.hit.seq)
self.assertEqual("||||||||||| |||||||||", hsp.aln_annotation["similarity"])
self.assertRaises(IndexError, hit.__getitem__, 1)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastn_001(self):
xml_file = get_file("xml_2226_blastn_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Zheng Zhang, Scott Schwartz, Lukas Wagner, and "
'Webb Miller (2000), "A greedy algorithm for '
'aligning DNA sequences", J Comput Biol 2000; '
"7(1-2):203-14.",
qresult.reference,
)
self.assertEqual(1.0, qresult.param_score_match)
self.assertEqual(-2.0, qresult.param_score_mismatch)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;m;", qresult.param_filter)
self.assertEqual(0.0, qresult.param_gap_open)
self.assertEqual(0.0, qresult.param_gap_extend)
self.assertEqual("blastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(7616765.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|356995852:1-490", qresult.id)
self.assertEqual(
"Mus musculus POU domain, class 5, transcription factor 1 (Pou5f1), transcript variant 1, mRNA",
qresult.description,
)
self.assertEqual(490, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(31860807.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|356995852|ref|NM_013633.3|", hit.id)
self.assertEqual(
"Mus musculus POU "
"domain, class 5, transcription factor 1 (Pou5f1), "
"transcript variant 1, mRNA",
hit.description,
)
self.assertEqual(1353, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(905.979, hsp.bitscore)
self.assertEqual(490, hsp.bitscore_raw)
self.assertEqual(0, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(490, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(490, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(490, hsp.ident_num)
self.assertEqual(490, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(490, hsp.aln_span)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.query.seq,
)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207307-1207372 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(66, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(3506256.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|94721341|ref|NM_001040441.1|", hit.id)
self.assertEqual(
"Homo sapiens zinc finger and BTB domain containing 8A (ZBTB8A), mRNA",
hit.description,
)
self.assertEqual(7333, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(115.613, hsp.bitscore)
self.assertEqual(62, hsp.bitscore_raw)
self.assertEqual(5.52066e-29, hsp.evalue)
self.assertEqual(4, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(3676, hsp.hit_start)
self.assertEqual(3738, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(62, hsp.ident_num)
self.assertEqual(62, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(62, hsp.aln_span)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(98.9927, hsp.bitscore)
self.assertEqual(53, hsp.bitscore_raw)
self.assertEqual(5.55986e-24, hsp.evalue)
self.assertEqual(5, hsp.query_start)
self.assertEqual(58, hsp.query_end)
self.assertEqual(2823, hsp.hit_start)
self.assertEqual(2876, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(53, hsp.ident_num)
self.assertEqual(53, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(53, hsp.aln_span)
self.assertEqual(
"CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA", hsp.query.seq
)
self.assertEqual(
"CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA", hsp.hit.seq
)
self.assertEqual(
"|||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|332865372|ref|XM_003318468.1|", hit.id)
self.assertEqual(
"PREDICTED: Pan "
"troglodytes zinc finger protein 273, transcript "
"variant 1 (ZNF273), mRNA",
hit.description,
)
self.assertEqual(4430, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(111.919, hsp.bitscore)
self.assertEqual(60, hsp.bitscore_raw)
self.assertEqual(7.14143e-28, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(2734, hsp.hit_start)
self.assertEqual(2800, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(-1, hsp.hit_frame)
self.assertEqual(64, hsp.ident_num)
self.assertEqual(64, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(66, hsp.aln_span)
self.assertEqual(
"TCAAGCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"TCACGCCATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||| |||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_xml_2226_blastn_002(self):
xml_file = get_file("xml_2226_blastn_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Zheng Zhang, Scott Schwartz, Lukas Wagner, and "
'Webb Miller (2000), "A greedy algorithm for '
'aligning DNA sequences", J Comput Biol 2000; '
"7(1-2):203-14.",
qresult.reference,
)
self.assertEqual(1.0, qresult.param_score_match)
self.assertEqual(-2.0, qresult.param_score_mismatch)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;m;", qresult.param_filter)
self.assertEqual(0.0, qresult.param_gap_open)
self.assertEqual(0.0, qresult.param_gap_extend)
self.assertEqual("blastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(7616765.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
self.assertEqual([], list(qresult.hits))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastn_003(self):
xml_file = get_file("xml_2226_blastn_003.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Zheng Zhang, Scott Schwartz, Lukas Wagner, and "
'Webb Miller (2000), "A greedy algorithm for '
'aligning DNA sequences", J Comput Biol 2000; '
"7(1-2):203-14.",
qresult.reference,
)
self.assertEqual(1.0, qresult.param_score_match)
self.assertEqual(-2.0, qresult.param_score_mismatch)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;m;", qresult.param_filter)
self.assertEqual(0.0, qresult.param_gap_open)
self.assertEqual(0.0, qresult.param_gap_extend)
self.assertEqual("blastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("gi|356995852:1-490", qresult.id)
self.assertEqual(
"Mus musculus POU domain, class 5, transcription "
"factor 1 (Pou5f1), transcript variant 1, mRNA",
qresult.description,
)
self.assertEqual(490, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(31860807.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|356995852|ref|NM_013633.3|", hit.id)
self.assertEqual(
"Mus musculus POU "
"domain, class 5, transcription factor 1 (Pou5f1), "
"transcript variant 1, mRNA",
hit.description,
)
self.assertEqual(1353, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(905.979, hsp.bitscore)
self.assertEqual(490, hsp.bitscore_raw)
self.assertEqual(0, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(490, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(490, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(490, hsp.ident_num)
self.assertEqual(490, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(490, hsp.aln_span)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.query.seq,
)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastn_004(self):
xml_file = get_file("xml_2226_blastn_004.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207307-1207372 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(66, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(3506256.0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|94721341|ref|NM_001040441.1|", hit.id)
self.assertEqual(
"Homo sapiens zinc finger and BTB domain containing 8A (ZBTB8A), mRNA",
hit.description,
)
self.assertEqual(7333, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(115.613, hsp.bitscore)
self.assertEqual(62, hsp.bitscore_raw)
self.assertEqual(5.52066e-29, hsp.evalue)
self.assertEqual(4, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(3676, hsp.hit_start)
self.assertEqual(3738, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(62, hsp.ident_num)
self.assertEqual(62, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(62, hsp.aln_span)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(98.9927, hsp.bitscore)
self.assertEqual(53, hsp.bitscore_raw)
self.assertEqual(5.55986e-24, hsp.evalue)
self.assertEqual(5, hsp.query_start)
self.assertEqual(58, hsp.query_end)
self.assertEqual(2823, hsp.hit_start)
self.assertEqual(2876, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(53, hsp.ident_num)
self.assertEqual(53, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(53, hsp.aln_span)
self.assertEqual(
"CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA", hsp.query.seq
)
self.assertEqual(
"CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA", hsp.hit.seq
)
self.assertEqual(
"|||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|332865372|ref|XM_003318468.1|", hit.id)
self.assertEqual(
"PREDICTED: Pan "
"troglodytes zinc finger protein 273, transcript "
"variant 1 (ZNF273), mRNA",
hit.description,
)
self.assertEqual(4430, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(111.919, hsp.bitscore)
self.assertEqual(60, hsp.bitscore_raw)
self.assertEqual(7.14143e-28, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(2734, hsp.hit_start)
self.assertEqual(2800, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(-1, hsp.hit_frame)
self.assertEqual(64, hsp.ident_num)
self.assertEqual(64, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(66, hsp.aln_span)
self.assertEqual(
"TCAAGCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"TCACGCCATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||| |||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastn_005(self):
xml_file = get_file("xml_2226_blastn_005.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Zheng Zhang, Scott Schwartz, Lukas Wagner, and "
'Webb Miller (2000), "A greedy algorithm for '
'aligning DNA sequences", J Comput Biol 2000; '
"7(1-2):203-14.",
qresult.reference,
)
self.assertEqual(1.0, qresult.param_score_match)
self.assertEqual(-2.0, qresult.param_score_mismatch)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;m;", qresult.param_filter)
self.assertEqual(0.0, qresult.param_gap_open)
self.assertEqual(0.0, qresult.param_gap_extend)
self.assertEqual("blastn", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|356995852:1-490", qresult.id)
self.assertEqual(
"Mus musculus POU domain, class 5, transcription "
"factor 1 (Pou5f1), transcript variant 1, mRNA",
qresult.description,
)
self.assertEqual(490, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|356995852|ref|NM_013633.3|", hit.id)
self.assertEqual(
"Mus musculus POU "
"domain, class 5, transcription factor 1 (Pou5f1), "
"transcript variant 1, mRNA",
hit.description,
)
self.assertEqual(1353, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(905.979, hsp.bitscore)
self.assertEqual(490, hsp.bitscore_raw)
self.assertEqual(0, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(490, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(490, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(490, hsp.ident_num)
self.assertEqual(490, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(490, hsp.aln_span)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.query.seq,
)
self.assertEqual(
"GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGGCCCCCGGCTCGGGGTGCCCACCTTCCCCATGGCTGGACACCTGGCTTCAGACTTCGCCTTCTCACCCCCACCAGGTGGGGGTGATGGGTCAGCAGGGCTGGAGCCGGGCTGGGTGGATCCTCGAACCTGGCTAAGCTTCCAAGGGCCTCCAGGTGGGCCTGGAATCGGACCAGGCTCAGAGGTATTGGGGATCTCCCCATGTCCGCCCGCATACGAGTTCTGCGGAGGGATGGCATACTGTGGACCTCAGGTTGGACTGGGCCTAGTCCCCCAAGTTGGCGTGGAGACTTTGCAGCCTGAGGGCCAGGCAGGAGCACGAGTGGAAAGCAACTCAGAGGGAACCTCCTCTGAGCCCTGTGCCGACCGCCCCAATGCCGTGAAGTTGGAGAAGGTGGAACCAACTCCCGAGGAGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207307-1207372 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(66, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.46, qresult.stat_kappa)
self.assertEqual(1.28, qresult.stat_lambda)
self.assertEqual(0.85, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|332237160|ref|XM_003267724.1|", hit.id)
self.assertEqual(
"PREDICTED: Nomascus leucogenys ATG14 autophagy "
"related 14 homolog (S. cerevisiae) (ATG14), mRNA",
hit.description,
)
self.assertEqual(4771, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(115.613, hsp.bitscore)
self.assertEqual(62, hsp.bitscore_raw)
self.assertEqual(3.35972e-23, hsp.evalue)
self.assertEqual(4, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(2864, hsp.hit_start)
self.assertEqual(2926, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(62, hsp.ident_num)
self.assertEqual(62, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(62, hsp.aln_span)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|332254616|ref|XM_003276378.1|", hit.id)
self.assertEqual(
"PREDICTED: Nomascus leucogenys S100P binding "
"protein, transcript variant 2 (S100PBP), mRNA",
hit.description,
)
self.assertEqual(4345, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(111.919, hsp.bitscore)
self.assertEqual(60, hsp.bitscore_raw)
self.assertEqual(4.34607e-22, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(66, hsp.query_end)
self.assertEqual(2791, hsp.hit_start)
self.assertEqual(2857, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(-1, hsp.hit_frame)
self.assertEqual(64, hsp.ident_num)
self.assertEqual(64, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(66, hsp.aln_span)
self.assertEqual(
"TCAAGCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.query.seq,
)
self.assertEqual(
"TCATGCCACTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA",
hsp.hit.seq,
)
self.assertEqual(
"||| |||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
class BlastpXmlCases(unittest.TestCase):
def test_xml_2212L_blastp_001(self):
xml_file = get_file("xml_2212L_blastp_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.12", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("nr", qresult.target)
self.assertEqual("gi|49176427|ref|NP_418280.3|", qresult.id)
self.assertEqual(
"component of Sec-independent translocase [Escherichia coli K12]",
qresult.description,
)
self.assertEqual(103, qresult.seq_len)
self.assertEqual(2934173, qresult.stat_db_num)
self.assertEqual(1011751523, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(212, len(qresult))
# check for alternative ID results
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|26250604|ref|NP_756644.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|30064867|ref|NP_839038.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|24115132|ref|NP_709642.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|24054404|gb|AAN45349.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|2367310|gb|AAC76839.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|30043127|gb|AAP18849.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|26111035|gb|AAN83218.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"],
qresult["gi|3193217|gb|AAC19240.1|"],
)
self.assertEqual(
qresult["gi|49176427|ref|NP_418280.3|"], qresult["gi|7444818|pir||E65188"]
)
hit = qresult[0]
self.assertEqual("gi|49176427|ref|NP_418280.3|", hit.id)
self.assertEqual(
"component of Sec-independent translocase [Escherichia coli K12]",
hit.description,
)
self.assertEqual(10, len(hit.id_all))
self.assertEqual(10, len(hit.description_all))
self.assertEqual(103, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(185.267, hsp.bitscore)
self.assertEqual(469, hsp.bitscore_raw)
self.assertEqual(4.20576e-46, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(103, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(103, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(103, hsp.ident_num)
self.assertEqual(103, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(103, hsp.aln_span)
self.assertEqual(
"MRLCLIIIYHRGTCMGGISIWQXXXXXXXXXXXFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV",
hsp.query.seq,
)
self.assertEqual(
"MRLCLIIIYHRGTCMGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV",
hsp.hit.seq,
)
self.assertEqual(
"MRLCLIIIYHRGTCMGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# parse last hit
hit = qresult[-1]
self.assertEqual("gi|39593039|emb|CAE64508.1|", hit.id)
self.assertEqual(
"Hypothetical protein CBG09238 [Caenorhabditis briggsae]", hit.description
)
self.assertEqual(960, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(31.5722, hsp.bitscore)
self.assertEqual(70, hsp.bitscore_raw)
self.assertEqual(7.7721, hsp.evalue)
self.assertEqual(54, hsp.query_start)
self.assertEqual(102, hsp.query_end)
self.assertEqual(409, hsp.hit_start)
self.assertEqual(459, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(19, hsp.ident_num)
self.assertEqual(33, hsp.pos_num)
self.assertEqual(4, hsp.gap_num)
self.assertEqual(51, hsp.aln_span)
self.assertEqual(
"KAMSDDEPKQD---KTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQ", hsp.query.seq
)
self.assertEqual(
"KKEADDKAKKDLEAKTKKEADEKAKKEADEKA-KKEAEAKTKEAEAKTKKE", hsp.hit.seq
)
self.assertEqual(
"K +DD+ K+D KT ++AD AK AD++A + +AKT++A+ K++",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2218_blastp_001(self):
xml_file = get_file("xml_2218_blastp_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.18+", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("nr", qresult.target)
self.assertEqual("31493", qresult.id)
self.assertEqual("unnamed protein product", qresult.description)
self.assertEqual(70, qresult.seq_len)
self.assertEqual(15287, qresult.stat_db_num)
self.assertEqual(7033566, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(10, len(qresult))
hit = qresult[0]
self.assertEqual("gi|151942244|gb|EDN60600.1|", hit.id)
self.assertEqual(
"cytosolic iron-sulfur protein assembly protein [Saccharomyces cerevisiae YJM789]",
hit.description,
)
self.assertEqual(330, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(33.113, hsp.bitscore)
self.assertEqual(74, hsp.bitscore_raw)
self.assertEqual(0.0185319, hsp.evalue)
self.assertEqual(14, hsp.query_start)
self.assertEqual(62, hsp.query_end)
self.assertEqual(113, hsp.hit_start)
self.assertEqual(163, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(16, hsp.ident_num)
self.assertEqual(27, hsp.pos_num)
self.assertEqual(2, hsp.gap_num)
self.assertEqual(50, hsp.aln_span)
self.assertEqual(
"AWNKDRTQIAICPNNHEVHIYE--KSGAKWNKVHELKEHNGQVTGIDWAP", hsp.query.seq
)
self.assertEqual(
"AWSNDGYYLATCSRDKSVWIWETDESGEEYECISVLQEHSQDVKHVIWHP", hsp.hit.seq
)
self.assertEqual(
"AW+ D +A C + V I+E +SG ++ + L+EH+ V + W P",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# parse last hit
hit = qresult[-1]
self.assertEqual("gi|151567870|pdb|2PM9|B", hit.id)
self.assertEqual(
"Chain B, Crystal Structure Of Yeast Sec1331 VERTEX ELEMENT OF THE Copii Vesicular Coat",
hit.description,
)
self.assertEqual(297, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(30.8018, hsp.bitscore)
self.assertEqual(68, hsp.bitscore_raw)
self.assertEqual(0.0919731, hsp.evalue)
self.assertEqual(20, hsp.query_start)
self.assertEqual(62, hsp.query_end)
self.assertEqual(67, hsp.hit_start)
self.assertEqual(109, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(11, hsp.ident_num)
self.assertEqual(23, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(42, hsp.aln_span)
self.assertEqual("TQIAICPNNHEVHIYEKSGAKWNKVHELKEHNGQVTGIDWAP", hsp.query.seq)
self.assertEqual("TILASCSYDGKVMIWKEENGRWSQIAVHAVHSASVNSVQWAP", hsp.hit.seq)
self.assertEqual(
"T +A C + +V I+++ +W+++ H+ V + WAP",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2218_blastp_002(self):
xml_file = get_file("xml_2218_blastp_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.18+", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(0.01, qresult.param_evalue_threshold)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("m L; R -d repeat/repeat_9606;", qresult.param_filter)
self.assertEqual("blastp", qresult.program)
self.assertEqual("gpipe/9606/Previous/protein", qresult.target)
# test parsed values of the first qresult
self.assertEqual("gi|585505|sp|Q08386|MOPB_RHOCA", qresult.id)
self.assertEqual(
"Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein",
qresult.description,
)
self.assertEqual(270, qresult.seq_len)
self.assertEqual(27252, qresult.stat_db_num)
self.assertEqual(13958303, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|129628|sp|P07175.1|PARA_AGRTU", qresult.id)
self.assertEqual("Protein parA", qresult.description)
self.assertEqual(222, qresult.seq_len)
self.assertEqual(27252, qresult.stat_db_num)
self.assertEqual(13958303, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
def test_xml_2218L_blastp_001(self):
xml_file = get_file("xml_2218L_blastp_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.18", qresult.version)
self.assertEqual(
"~Reference: Altschul, Stephen F., "
"Thomas L. Madden, Alejandro A. Schaffer, "
"~Jinghui Zhang, Zheng Zhang, Webb Miller, "
'and David J. Lipman (1997), ~"Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search~programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(1e-05, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual(
"/Users/pjcock/Downloads/Software/blast-2.2.18/data/nr", qresult.target
)
# test parsed values of the first qresult
self.assertEqual("Fake", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(9, qresult.seq_len)
self.assertEqual(6589360, qresult.stat_db_num)
self.assertEqual(2253133281, qresult.stat_db_len)
self.assertEqual(2.02782e10, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2222_blastp_001(self):
xml_file = get_file("xml_2222_blastp_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.22+", qresult.version)
self.assertEqual(
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(1e-06, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("nr", qresult.target)
self.assertEqual("1", qresult.id)
self.assertEqual("gi|3298468|dbj|BAA31520.1| SAMIPF", qresult.description)
self.assertEqual(107, qresult.seq_len)
self.assertEqual(8994603, qresult.stat_db_num)
self.assertEqual(-1216159329, qresult.stat_db_len)
self.assertEqual(76934807744, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(10, len(qresult))
hit = qresult[0]
self.assertEqual("gi|3298468|dbj|BAA31520.1|", hit.id)
self.assertEqual("SAMIPF [Aster tripolium]", hit.description)
self.assertEqual(107, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(204.912011757068, hsp.bitscore)
self.assertEqual(520, hsp.bitscore_raw)
self.assertEqual(1.77242652875017e-51, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(107, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(107, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(107, hsp.ident_num)
self.assertEqual(107, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(107, hsp.aln_span)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVGVTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI",
hsp.query.seq,
)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVGVTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI",
hsp.hit.seq,
)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVGVTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# parse last hit
hit = qresult[-1]
self.assertEqual("gi|162809290|dbj|BAF95576.1|", hit.id)
self.assertEqual(
"tonoplast intrinsic protein [Nicotiana tabacum]", hit.description
)
self.assertEqual(251, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(177.948041442853, hsp.bitscore)
self.assertEqual(450, hsp.bitscore_raw)
self.assertEqual(2.0302699895292e-43, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(107, hsp.query_end)
self.assertEqual(80, hsp.hit_start)
self.assertEqual(187, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(91, hsp.ident_num)
self.assertEqual(95, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(107, hsp.aln_span)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVGVTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI",
hsp.query.seq,
)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITLFRGILYIIAQLLGSTVACFLLEFATGGMSTGAFALSAGVSVWNAFVFEIVMTFGLVYTVYATAIDPKKGDLGVIAPIAIGFIVGANI",
hsp.hit.seq,
)
self.assertEqual(
"GGHVNPAVTFGAFVGGNITL RGI+YIIAQLLGSTVAC LL+F T M+ G F+LSAGV V NA VFEIVMTFGLVYTVYATAIDPKKG LG IAPIAIGFIVGANI",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# def test_xml_2218L_rpsblast_001(self):
# PSI-blast, handle later
def test_xml_2226_blastp_001(self):
xml_file = get_file("xml_2226_blastp_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(156650.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis "
"subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(361344, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|308175296|ref|YP_003922001.1|", hit.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]",
hit.description,
)
self.assertEqual(100, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(139.428, hsp.bitscore)
self.assertEqual(350, hsp.bitscore_raw)
self.assertEqual(1.99275e-46, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(102, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(100, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(69, hsp.ident_num)
self.assertEqual(81, hsp.pos_num)
self.assertEqual(2, hsp.gap_num)
self.assertEqual(102, hsp.aln_span)
self.assertEqual(
"MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN",
hsp.query.seq,
)
self.assertEqual(
"MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPVSFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN",
hsp.hit.seq,
)
self.assertEqual(
"MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(345626, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|11464971|ref|NP_062422.1|", hit.id)
self.assertEqual("pleckstrin [Mus musculus]", hit.description)
self.assertEqual(350, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(205.682, hsp.bitscore)
self.assertEqual(522, hsp.bitscore_raw)
self.assertEqual(2.24956e-69, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(98, hsp.ident_num)
self.assertEqual(98, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(43.5134, hsp.bitscore)
self.assertEqual(101, hsp.bitscore_raw)
self.assertEqual(2.90061e-09, hsp.evalue)
self.assertEqual(2, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(245, hsp.hit_start)
self.assertEqual(345, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(29, hsp.ident_num)
self.assertEqual(48, hsp.pos_num)
self.assertEqual(6, hsp.gap_num)
self.assertEqual(100, hsp.aln_span)
self.assertEqual(
"IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|350596020|ref|XP_003360649.2|", hit.id)
self.assertEqual("PREDICTED: pleckstrin-like [Sus scrofa]", hit.description)
self.assertEqual(228, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(199.519, hsp.bitscore)
self.assertEqual(506, hsp.bitscore_raw)
self.assertEqual(1.97058e-68, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_xml_2226_blastp_002(self):
xml_file = get_file("xml_2226_blastp_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(156650.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastp_003(self):
xml_file = get_file("xml_2226_blastp_003.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis "
"subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(361344, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|308175296|ref|YP_003922001.1|", hit.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]",
hit.description,
)
self.assertEqual(100, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(139.428, hsp.bitscore)
self.assertEqual(350, hsp.bitscore_raw)
self.assertEqual(1.99275e-46, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(102, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(100, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(69, hsp.ident_num)
self.assertEqual(81, hsp.pos_num)
self.assertEqual(2, hsp.gap_num)
self.assertEqual(102, hsp.aln_span)
self.assertEqual(
"MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN",
hsp.query.seq,
)
self.assertEqual(
"MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPVSFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN",
hsp.hit.seq,
)
self.assertEqual(
"MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastp_004(self):
xml_file = get_file("xml_2226_blastp_004.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(345626, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|11464971|ref|NP_062422.1|", hit.id)
self.assertEqual("pleckstrin [Mus musculus]", hit.description)
self.assertEqual(350, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(205.682, hsp.bitscore)
self.assertEqual(522, hsp.bitscore_raw)
self.assertEqual(2.24956e-69, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(98, hsp.ident_num)
self.assertEqual(98, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(43.5134, hsp.bitscore)
self.assertEqual(101, hsp.bitscore_raw)
self.assertEqual(2.90061e-09, hsp.evalue)
self.assertEqual(2, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(245, hsp.hit_start)
self.assertEqual(345, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(29, hsp.ident_num)
self.assertEqual(48, hsp.pos_num)
self.assertEqual(6, hsp.gap_num)
self.assertEqual(100, hsp.aln_span)
self.assertEqual(
"IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|350596020|ref|XP_003360649.2|", hit.id)
self.assertEqual("PREDICTED: pleckstrin-like [Sus scrofa]", hit.description)
self.assertEqual(228, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(199.519, hsp.bitscore)
self.assertEqual(506, hsp.bitscore_raw)
self.assertEqual(1.97058e-68, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastp_005(self):
xml_file = get_file("xml_2226_blastp_005.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("F", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastp", qresult.program)
self.assertEqual("refseq_protein", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(12646943, qresult.stat_db_num)
self.assertEqual(4397139428, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(12646943, qresult.stat_db_num)
self.assertEqual(4397139428, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
# check for alternative ID results
self.assertEqual(
qresult["gi|16080617|ref|NP_391444.1|"],
qresult["gi|221311516|ref|ZP_03593363.1|"],
)
self.assertEqual(
qresult["gi|16080617|ref|NP_391444.1|"],
qresult["gi|221315843|ref|ZP_03597648.1|"],
)
self.assertEqual(
qresult["gi|16080617|ref|NP_391444.1|"],
qresult["gi|221320757|ref|ZP_03602051.1|"],
)
self.assertEqual(
qresult["gi|16080617|ref|NP_391444.1|"],
qresult["gi|221325043|ref|ZP_03606337.1|"],
)
self.assertEqual(
qresult["gi|16080617|ref|NP_391444.1|"],
qresult["gi|321313111|ref|YP_004205398.1|"],
)
hit = qresult[0]
self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus "
"subtilis subsp. subtilis str. 168]",
hit.description,
)
self.assertEqual(6, len(hit.id_all))
self.assertEqual(6, len(hit.description_all))
self.assertEqual(102, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(205.297, hsp.bitscore)
self.assertEqual(521, hsp.bitscore_raw)
self.assertEqual(1.45285e-66, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(102, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(102, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(102, hsp.ident_num)
self.assertEqual(102, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(102, hsp.aln_span)
self.assertEqual(
"MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN",
hsp.query.seq,
)
self.assertEqual(
"MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN",
hsp.hit.seq,
)
self.assertEqual(
"MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(12646943, qresult.stat_db_num)
self.assertEqual(4397139428, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|11464971|ref|NP_062422.1|", hit.id)
self.assertEqual("pleckstrin [Mus musculus]", hit.description)
self.assertEqual(350, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(205.682, hsp.bitscore)
self.assertEqual(522, hsp.bitscore_raw)
self.assertEqual(1.54412e-63, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(98, hsp.ident_num)
self.assertEqual(98, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(43.5134, hsp.bitscore)
self.assertEqual(101, hsp.bitscore_raw)
self.assertEqual(0.00199101, hsp.evalue)
self.assertEqual(2, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(245, hsp.hit_start)
self.assertEqual(345, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(29, hsp.ident_num)
self.assertEqual(48, hsp.pos_num)
self.assertEqual(6, hsp.gap_num)
self.assertEqual(100, hsp.aln_span)
self.assertEqual(
"IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|350596020|ref|XP_003360649.2|", hit.id)
self.assertEqual("PREDICTED: pleckstrin-like [Sus scrofa]", hit.description)
self.assertEqual(228, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(199.519, hsp.bitscore)
self.assertEqual(506, hsp.bitscore_raw)
self.assertEqual(1.35263e-62, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(3, hsp.hit_start)
self.assertEqual(101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
class BlastxXmlCases(unittest.TestCase):
def test_xml_2212L_blastx_001(self):
xml_file = get_file("xml_2212L_blastx_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.12", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("nr", qresult.target)
# test parsed values of the first qresult
self.assertEqual("gi|1347369|gb|G25137.1|G25137", qresult.id)
self.assertEqual(
"human STS EST48004, sequence tagged site", qresult.description
)
self.assertEqual(556, qresult.seq_len)
self.assertEqual(2934173, qresult.stat_db_num)
self.assertEqual(1011751523, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
# test parsed values of the first hit
hit = qresult[0]
self.assertEqual("gi|12654095|gb|AAH00859.1|", hit.id)
self.assertEqual(
"Unknown (protein for IMAGE:3459481) [Homo sapiens]", hit.description
)
self.assertEqual(319, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(247.284, hsp.bitscore)
self.assertEqual(630, hsp.bitscore_raw)
self.assertEqual(1.69599e-64, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(399, hsp.query_end)
self.assertEqual(155, hsp.hit_start)
self.assertEqual(288, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(122, hsp.ident_num)
self.assertEqual(123, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(133, hsp.aln_span)
self.assertEqual(
"DLQLLIKAVNLFPAGTNSRWEVIANYMNIHSSSGVKRTAKDVIGKAKSLQKLDPHQKDDINKKAFDKFKKEHGVVPQADNATPSERFXGPYTDFTPXTTEXQKLXEQALNTYPVNTXERWXXIAVAVPGRXKE",
hsp.query.seq,
)
self.assertEqual(
"DLQLLIKAVNLFPAGTNSRWEVIANYMNIHSSSGVKRTAKDVIGKAKSLQKLDPHQKDDINKKAFDKFKKEHGVVPQADNATPSERFEGPYTDFTPWTTEEQKLLEQALKTYPVNTPERWEKIAEAVPGRTKK",
hsp.hit.seq,
)
self.assertEqual(
"DLQLLIKAVNLFPAGTNSRWEVIANYMNIHSSSGVKRTAKDVIGKAKSLQKLDPHQKDDINKKAFDKFKKEHGVVPQADNATPSERF GPYTDFTP TTE QKL EQAL TYPVNT ERW IA AVPGR K+",
hsp.aln_annotation["similarity"],
)
# test parsed values of last hit
hit = qresult[-1]
self.assertEqual("gi|72081091|ref|XP_800619.1|", hit.id)
self.assertEqual(
"PREDICTED: hypothetical protein XP_795526 [Strongylocentrotus purpuratus]",
hit.description,
)
self.assertEqual(337, hit.seq_len)
hsp = hit.hsps[0]
self.assertEqual(32.3426, hsp.bitscore)
self.assertEqual(72, hsp.bitscore_raw)
self.assertEqual(8.57476, hsp.evalue)
self.assertEqual(39, hsp.query_start)
self.assertEqual(231, hsp.query_end)
self.assertEqual(105, hsp.hit_start)
self.assertEqual(172, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(21, hsp.ident_num)
self.assertEqual(37, hsp.pos_num)
self.assertEqual(3, hsp.gap_num)
self.assertEqual(
"AGTNSRWEVIANYMNI--HSSSGVKRT-AKDVIGKAKSLQKLDPHQKDDINKKAFDKFKKEHGVVPQ",
hsp.query.seq,
)
self.assertEqual(
"SSSNSSSKASASSSNVGASSSSGTKKSDSKSSNESSKSKRDKEDHKEGSINRSKDEKVSKEHRVVKE",
hsp.hit.seq,
)
self.assertEqual(
"+ +NS + A+ N+ SSSG K++ +K +KS + + H++ IN+ +K KEH VV +",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2222_blastx_001(self):
xml_file = get_file("xml_2222_blastx_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.22+", qresult.version)
self.assertEqual(
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(0.0001, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("nr", qresult.target)
# test parsed values of the first qresult
self.assertEqual("1", qresult.id)
self.assertEqual(
"gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence",
qresult.description,
)
self.assertEqual(1002, qresult.seq_len)
self.assertEqual(8994603, qresult.stat_db_num)
self.assertEqual(-1216159329, qresult.stat_db_len)
self.assertEqual(367397307882, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
# test parsed values of the first hit
hit = qresult[0]
self.assertEqual("gi|149390769|gb|ABR25402.1|", hit.id)
self.assertEqual(
"unknown [Oryza sativa (indica cultivar-group)]", hit.description
)
self.assertEqual(26, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(54.2989775733826, hsp.bitscore)
self.assertEqual(129, hsp.bitscore_raw)
self.assertEqual(1.83262460293058e-05, hsp.evalue)
self.assertEqual(910, hsp.query_start)
self.assertEqual(988, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(26, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(24, hsp.ident_num)
self.assertEqual(25, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(26, hsp.aln_span)
self.assertEqual("HMLVSKIKPCMCKYEQIQTVKLRMAH", hsp.query.seq)
self.assertEqual("HMLVSKIKPCMCKYELIRTVKLRMAH", hsp.hit.seq)
self.assertEqual("HMLVSKIKPCMCKYE I+TVKLRMAH", hsp.aln_annotation["similarity"])
def test_xml_2226_blastx_001(self):
xml_file = get_file("xml_2226_blastx_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein "
'database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207057-1207541 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(485, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(662354, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|332258565|ref|XP_003278367.1|", hit.id)
self.assertEqual(
"PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]",
hit.description,
)
self.assertEqual(132, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(121.709, hsp.bitscore)
self.assertEqual(304, hsp.bitscore_raw)
self.assertEqual(2.9522e-38, hsp.evalue)
self.assertEqual(15, hsp.query_start)
self.assertEqual(300, hsp.query_end)
self.assertEqual(24, hsp.hit_start)
self.assertEqual(119, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(69, hsp.ident_num)
self.assertEqual(74, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(95, hsp.aln_span)
self.assertEqual(
"LRRSFALVAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP",
hsp.query.seq,
)
self.assertEqual(
"LRRSFALVAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLANFLFLVEMGFLHVGQAGLELVTSGDPPTLTSQSAGIIGVSHCAQP",
hsp.hit.seq,
)
self.assertEqual(
"LRRSFALVAQ VQW +LG PQPPPPGFK FSCLS SSW+YRH+PP L NF+FLVE GF HVGQAGLE SG+ P SQS GI GVSH AQP",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(51.6026, hsp.bitscore)
self.assertEqual(122, hsp.bitscore_raw)
self.assertEqual(2.73605e-12, hsp.evalue)
self.assertEqual(243, hsp.query_start)
self.assertEqual(459, hsp.query_end)
self.assertEqual(31, hsp.hit_start)
self.assertEqual(98, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(34, hsp.ident_num)
self.assertEqual(41, hsp.pos_num)
self.assertEqual(5, hsp.gap_num)
self.assertEqual(72, hsp.aln_span)
self.assertEqual(
"VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPPHPANXXXXXXXXFLRRSFALVAQAGVQWLDLG",
hsp.query.seq,
)
self.assertEqual(
"VAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLAN-----FLFLVEMGFLHVGQAGLELVTSG",
hsp.hit.seq,
)
self.assertEqual(
"V RVQ ++L S QPP P FK FS LSL SSW+ R PPH AN F F + F V QAG++ + G",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|33188429|ref|NP_872601.1|", hit.id)
self.assertEqual(
"histone demethylase UTY isoform 1 [Homo sapiens]", hit.description
)
self.assertEqual(1079, hit.seq_len)
self.assertEqual(6, len(hit))
hsp = hit.hsps[0]
self.assertEqual(104.375, hsp.bitscore)
self.assertEqual(259, hsp.bitscore_raw)
self.assertEqual(6.31914e-29, hsp.evalue)
self.assertEqual(18, hsp.query_start)
self.assertEqual(291, hsp.query_end)
self.assertEqual(988, hsp.hit_start)
self.assertEqual(1079, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(59, hsp.ident_num)
self.assertEqual(66, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(91, hsp.aln_span)
self.assertEqual(
"SFALVAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQ",
hsp.query.seq,
)
self.assertEqual(
"SFQESLRAGMQWCDLSSLQPPPPGFKRFSHLSLPNSWNYRHLPSCPTNFCIFVETGFHHVGQACLELLTSGGLLASASQSAGITGVSHHAR",
hsp.hit.seq,
)
self.assertEqual(
"SF +AG+QW DL QPPPPGFK FS LS P+SW+YRH+P C NF VETGF+HVGQA LE SG L A ASQS GITGVSHHA+",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
def test_xml_2226_blastx_002(self):
xml_file = get_file("xml_2226_blastx_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein database "
'search programs", Nucleic Acids Res. 25:3389-3402.',
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastx_003(self):
xml_file = get_file("xml_2226_blastx_003.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein database "
'search programs", Nucleic Acids Res. 25:3389-3402.',
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("db/minirefseq_prot", qresult.target)
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207057-1207541 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(485, qresult.seq_len)
self.assertEqual(20, qresult.stat_db_num)
self.assertEqual(6406, qresult.stat_db_len)
self.assertEqual(662354, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|332258565|ref|XP_003278367.1|", hit.id)
self.assertEqual(
"PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]",
hit.description,
)
self.assertEqual(132, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(121.709, hsp.bitscore)
self.assertEqual(304, hsp.bitscore_raw)
self.assertEqual(2.9522e-38, hsp.evalue)
self.assertEqual(15, hsp.query_start)
self.assertEqual(300, hsp.query_end)
self.assertEqual(24, hsp.hit_start)
self.assertEqual(119, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(69, hsp.ident_num)
self.assertEqual(74, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(95, hsp.aln_span)
self.assertEqual(
"LRRSFALVAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP",
hsp.query.seq,
)
self.assertEqual(
"LRRSFALVAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLANFLFLVEMGFLHVGQAGLELVTSGDPPTLTSQSAGIIGVSHCAQP",
hsp.hit.seq,
)
self.assertEqual(
"LRRSFALVAQ VQW +LG PQPPPPGFK FSCLS SSW+YRH+PP L NF+FLVE GF HVGQAGLE SG+ P SQS GI GVSH AQP",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(51.6026, hsp.bitscore)
self.assertEqual(122, hsp.bitscore_raw)
self.assertEqual(2.73605e-12, hsp.evalue)
self.assertEqual(243, hsp.query_start)
self.assertEqual(459, hsp.query_end)
self.assertEqual(31, hsp.hit_start)
self.assertEqual(98, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(34, hsp.ident_num)
self.assertEqual(41, hsp.pos_num)
self.assertEqual(5, hsp.gap_num)
self.assertEqual(72, hsp.aln_span)
self.assertEqual(
"VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPPHPANXXXXXXXXFLRRSFALVAQAGVQWLDLG",
hsp.query.seq,
)
self.assertEqual(
"VAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLAN-----FLFLVEMGFLHVGQAGLELVTSG",
hsp.hit.seq,
)
self.assertEqual(
"V RVQ ++L S QPP P FK FS LSL SSW+ R PPH AN F F + F V QAG++ + G",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|33188429|ref|NP_872601.1|", hit.id)
self.assertEqual(
"histone demethylase UTY isoform 1 [Homo sapiens]", hit.description
)
self.assertEqual(1079, hit.seq_len)
self.assertEqual(6, len(hit))
hsp = hit.hsps[0]
self.assertEqual(104.375, hsp.bitscore)
self.assertEqual(259, hsp.bitscore_raw)
self.assertEqual(6.31914e-29, hsp.evalue)
self.assertEqual(18, hsp.query_start)
self.assertEqual(291, hsp.query_end)
self.assertEqual(988, hsp.hit_start)
self.assertEqual(1079, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(59, hsp.ident_num)
self.assertEqual(66, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(91, hsp.aln_span)
self.assertEqual(
"SFALVAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQ",
hsp.query.seq,
)
self.assertEqual(
"SFQESLRAGMQWCDLSSLQPPPPGFKRFSHLSLPNSWNYRHLPSCPTNFCIFVETGFHHVGQACLELLTSGGLLASASQSAGITGVSHHAR",
hsp.hit.seq,
)
self.assertEqual(
"SF +AG+QW DL QPPPPGFK FS LS P+SW+YRH+P C NF VETGF+HVGQA LE SG L A ASQS GITGVSHHA+",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_blastx_004(self):
xml_file = get_file("xml_2226_blastx_004.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb "
'Miller, and David J. Lipman (1997), "Gapped BLAST '
"and PSI-BLAST: a new generation of protein database "
'search programs", Nucleic Acids Res. 25:3389-3402.',
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("blastx", qresult.program)
self.assertEqual("refseq_protein", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(12646943, qresult.stat_db_num)
self.assertEqual(4397139428, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("hg19_dna", qresult.id)
self.assertEqual(
"range=chr1:1207057-1207541 5'pad=0 3'pad=0 strand=+ repeatMasking=none",
qresult.description,
)
self.assertEqual(485, qresult.seq_len)
self.assertEqual(12646943, qresult.stat_db_num)
self.assertEqual(4397139428, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|332258565|ref|XP_003278367.1|", hit.id)
self.assertEqual(
"PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]",
hit.description,
)
self.assertEqual(132, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(121.709, hsp.bitscore)
self.assertEqual(304, hsp.bitscore_raw)
self.assertEqual(2.02642e-32, hsp.evalue)
self.assertEqual(15, hsp.query_start)
self.assertEqual(300, hsp.query_end)
self.assertEqual(24, hsp.hit_start)
self.assertEqual(119, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(69, hsp.ident_num)
self.assertEqual(74, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(95, hsp.aln_span)
self.assertEqual(
"LRRSFALVAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP",
hsp.query.seq,
)
self.assertEqual(
"LRRSFALVAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLANFLFLVEMGFLHVGQAGLELVTSGDPPTLTSQSAGIIGVSHCAQP",
hsp.hit.seq,
)
self.assertEqual(
"LRRSFALVAQ VQW +LG PQPPPPGFK FSCLS SSW+YRH+PP L NF+FLVE GF HVGQAGLE SG+ P SQS GI GVSH AQP",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(51.6026, hsp.bitscore)
self.assertEqual(122, hsp.bitscore_raw)
self.assertEqual(1.87805e-06, hsp.evalue)
self.assertEqual(243, hsp.query_start)
self.assertEqual(459, hsp.query_end)
self.assertEqual(31, hsp.hit_start)
self.assertEqual(98, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(34, hsp.ident_num)
self.assertEqual(41, hsp.pos_num)
self.assertEqual(5, hsp.gap_num)
self.assertEqual(72, hsp.aln_span)
self.assertEqual(
"VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPPHPANXXXXXXXXFLRRSFALVAQAGVQWLDLG",
hsp.query.seq,
)
self.assertEqual(
"VAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPPHLAN-----FLFLVEMGFLHVGQAGLELVTSG",
hsp.hit.seq,
)
self.assertEqual(
"V RVQ ++L S QPP P FK FS LSL SSW+ R PPH AN F F + F V QAG++ + G",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|332815399|ref|XP_003309509.1|", hit.id)
self.assertEqual(
"PREDICTED: histone demethylase UTY-like [Pan troglodytes]", hit.description
)
self.assertEqual(101, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(97.0561, hsp.bitscore)
self.assertEqual(240, hsp.bitscore_raw)
self.assertEqual(2.76414e-23, hsp.evalue)
self.assertEqual(6, hsp.query_start)
self.assertEqual(279, hsp.query_end)
self.assertEqual(9, hsp.hit_start)
self.assertEqual(100, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(0, hsp.hit_frame)
self.assertEqual(56, hsp.ident_num)
self.assertEqual(62, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(91, hsp.aln_span)
self.assertEqual(
"VAQAGVQWLDLGXXXXXXPGFK*FSCLSHPSSWDYRHMPPCLINFVFLVETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQPLCE",
hsp.query.seq,
)
self.assertEqual(
"VPHAGVQWHNLSSLQPPPSRFKPFSYLSLLSSWDQRRPPPCLVTFVFLIETGFRHVGQAGLKLLTSGDPSASASQSAGIRGVSHCTWPECQ",
hsp.hit.seq,
)
self.assertEqual(
"V AGVQW +L QPPP FK FS LS SSWD R PPCL+ FVFL+ETGF HVGQAGL+ SG+ A ASQS GI GVSH P C+",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
class TblastnXmlCases(unittest.TestCase):
def test_xml_2212L_tblastn_001(self):
xml_file = get_file("xml_2212L_tblastn_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.12", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(0.001, qresult.param_evalue_threshold)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("nr", qresult.target)
# test parsed values of qresult
self.assertEqual("gi|729325|sp|P39483|DHG2_BACME", qresult.id)
self.assertEqual("Glucose 1-dehydrogenase II (GLCDH-II)", qresult.description)
self.assertEqual(261, qresult.seq_len)
self.assertEqual(251887, qresult.stat_db_num)
self.assertEqual(438542399, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(100, len(qresult))
hit = qresult[0]
self.assertEqual("gi|58264321|ref|XM_569317.1|", hit.id)
self.assertEqual(
"Filobasidiella neoformans glucose 1-dehydrogenase, putative (CNB05760) mRNA, complete cds",
hit.description,
)
self.assertEqual(904, hit.seq_len)
self.assertEqual(1, len(hit))
self.assertRaises(IndexError, hit.__getitem__, 1)
hsp = hit.hsps[0]
self.assertEqual(148.288, hsp.bitscore)
self.assertEqual(373, hsp.bitscore_raw)
self.assertEqual(1.46834e-35, hsp.evalue)
self.assertEqual(4, hsp.query_start)
self.assertEqual(250, hsp.query_end)
self.assertEqual(15, hsp.hit_start)
self.assertEqual(762, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(84, hsp.ident_num)
self.assertEqual(143, hsp.pos_num)
self.assertEqual(9, hsp.gap_num)
self.assertEqual(252, hsp.aln_span)
self.assertEqual(
"LKDKVVVVTGGSKGLGRAMAVRFGQEQSKVVVNYRSNXXXXXXXXXXXXXXXXGGQAIIVRGDVTKEEDVVNLVETAVKEFGSLDVMINNAGVENPVPSH---ELSLENWNQVIDTNLTGAFLGSREAIKYFVENDIKG-NVINMSSVHEMIPWPLFVHYAASKGGMKLMTETLALEYAPKGIRVNNIGPGAIDTPINAEKFADPEQRADVESMIPMGYIGKPEEIASVAAFLASSQASYVTGITLFADGGM",
hsp.query.seq,
)
self.assertEqual(
"LQGKVVAITGCSTGIGRAIAIGAAKNGANVVLHHLGDSTASDIAQVQEECKQAGAKTVVVPGDIAEAKTANEIVSAAVSSFSRIDVLISNAGI---CPFHSFLDLPHPLWKRVQDVNLNGSFYVVQAVANQMAKQEPKGGSIVAVSSISALMGGGEQCHYTPTKAGIKSLMESCAIALGPMGIRCNSVLPGTIETNINKEDLSNPEKRADQIRRVPLGRLGKPEDLVGPTLFFASDLSNYCTGASVLVDGGM",
hsp.hit.seq,
)
self.assertEqual(
"L+ KVV +TG S G+GRA+A+ + + VV+++ + + + + E +AG + ++V GD+ + + +V AV F +DV+I+NAG+ P H +L W +V D NL G+F + + + KG +++ +SS+ ++ HY +K G+K + E+ A+ P GIR N++ PG I+T IN E ++PE+RAD +P+G +GKPE++ F AS ++Y TG ++ DGGM",
hsp.aln_annotation["similarity"],
)
# parse last hit
hit = qresult[-1]
self.assertEqual("gi|450259|gb|L27825.1|EMEVERA1AA", hit.id)
self.assertEqual(
"Emericella nidulans (verA) gene, complete cds, ORF 1 gene, complete cds, and ORF 2 gene, 5' end",
hit.description,
)
self.assertEqual(4310, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(91.2781, hsp.bitscore)
self.assertEqual(225, hsp.bitscore_raw)
self.assertEqual(1.31998e-20, hsp.evalue)
self.assertEqual(4, hsp.query_start)
self.assertEqual(204, hsp.query_end)
self.assertEqual(578, hsp.hit_start)
self.assertEqual(1253, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(3, hsp.hit_frame)
self.assertEqual(76, hsp.ident_num)
self.assertEqual(113, hsp.pos_num)
self.assertEqual(31, hsp.gap_num)
self.assertEqual(228, hsp.aln_span)
self.assertEqual(
"LKDKVVVVTGGSKGLGRAMAVRFGQEQSKVVVNYRSNXXXXXXXXXXXXXXGGQAIIVRGDVTKEEDVVNLVETAVKEFGSLDVMINNAGV-----------ENPVPS-HELSLE-----NWNQVIDTNLTGAFLGSREAIKYFVENDIKGNVINMSSVHEMIPW-PLFVHYAASKGGMKLMTETLALEYAPKGIRVNNIGPGAIDTPI----------NAEKFADPE",
hsp.query.seq,
)
self.assertEqual(
"LDGKVALVTGAGRGIGAAIAVALGQPGAKVVVNYANSREAAEKVVDEIKSNAQSAISIQADVGDPDAVTKLMDQAVEHFGYLDIVSSNAGIVSFGHVKDVTPDVCVPSPYESPVEL*PQQEFDRVFRVNTRGQFFVAREAYRHLREG---GRIILTSSNTASVKGVPRHAVYSGSKGAIDTFVRCLAIDCGDKKITVNAVAPGAIKTDMFLSVSREYIPNGETFTDEQ",
hsp.hit.seq,
)
self.assertEqual(
"L KV +VTG +G+G A+AV GQ +KVVVNY ++ E A +V EI+ AI ++ DV + V L++ AV+ FG LD++ +NAG+ + VPS +E +E +++V N G F +REA ++ E G +I SS + P Y+ SKG + LA++ K I VN + PGAI T + N E F D +",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastn_001(self):
xml_file = get_file("xml_2226_tblastn_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis "
"subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1205400.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual(
"Paramecium tetraurelia hypothetical protein (GSPATT00004923001) partial mRNA",
hit.description,
)
self.assertEqual(4632, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(34.6538, hsp.bitscore)
self.assertEqual(78, hsp.bitscore_raw)
self.assertEqual(1.08241e-05, hsp.evalue)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(15, hsp.ident_num)
self.assertEqual(26, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(43, hsp.aln_span)
self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq)
self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq)
self.assertEqual(
"P + TK+GT +GL HTI + + +SL++ E++ D+D",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1119300.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual(
"PREDICTED: Sus scrofa pleckstrin-like (LOC100626968), mRNA",
hit.description,
)
self.assertEqual(772, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(199.519, hsp.bitscore)
self.assertEqual(506, hsp.bitscore_raw)
self.assertEqual(1.57249e-67, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(32.7278, hsp.bitscore)
self.assertEqual(73, hsp.bitscore_raw)
self.assertEqual(4.07518e-05, hsp.evalue)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(21, hsp.ident_num)
self.assertEqual(33, hsp.pos_num)
self.assertEqual(4, hsp.gap_num)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(
"IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"+ +Y P G I L+G +TS GK F+ + T + +F QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|365982352|ref|XM_003667962.1|", hit.id)
self.assertEqual(
"Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA",
hit.description,
)
self.assertEqual(4932, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(19.631, hsp.bitscore)
self.assertEqual(39, hsp.bitscore_raw)
self.assertEqual(1.65923, hsp.evalue)
self.assertEqual(11, hsp.query_start)
self.assertEqual(54, hsp.query_end)
self.assertEqual(3180, hsp.hit_start)
self.assertEqual(3336, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(16, hsp.ident_num)
self.assertEqual(23, hsp.pos_num)
self.assertEqual(9, hsp.gap_num)
self.assertEqual(52, hsp.aln_span)
self.assertEqual(
"GSVFNTWKPMWVVLL---------EDGIEFYKKKSDNSPKGMIPLKGSTLTS", hsp.query.seq
)
self.assertEqual(
"GSCFPTWDLIFIEVLNPFLKEKLWEADNEEISKFVDLTLKGLVDLYPSHFTS", hsp.hit.seq
)
self.assertEqual(
"GS F TW +++ +L E E K D + KG++ L S TS",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
def test_xml_2226_tblastn_002(self):
xml_file = get_file("xml_2226_tblastn_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastn_003(self):
xml_file = get_file("xml_2226_tblastn_003.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1205400.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual(
"Paramecium tetraurelia hypothetical protein (GSPATT00004923001) partial mRNA",
hit.description,
)
self.assertEqual(4632, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(34.6538, hsp.bitscore)
self.assertEqual(78, hsp.bitscore_raw)
self.assertEqual(1.08241e-05, hsp.evalue)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(15, hsp.ident_num)
self.assertEqual(26, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(43, hsp.aln_span)
self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq)
self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq)
self.assertEqual(
"P + TK+GT +GL HTI + + +SL++ E++ D+D",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastn_004(self):
xml_file = get_file("xml_2226_tblastn_004.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1119300.0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
self.assertEqual(
"PREDICTED: Sus scrofa pleckstrin-like (LOC100626968), mRNA",
hit.description,
)
self.assertEqual(772, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(199.519, hsp.bitscore)
self.assertEqual(506, hsp.bitscore_raw)
self.assertEqual(1.57249e-67, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(94, hsp.hit_start)
self.assertEqual(388, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(94, hsp.ident_num)
self.assertEqual(96, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(32.7278, hsp.bitscore)
self.assertEqual(73, hsp.bitscore_raw)
self.assertEqual(4.07518e-05, hsp.evalue)
self.assertEqual(29, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(541, hsp.hit_start)
self.assertEqual(754, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(21, hsp.ident_num)
self.assertEqual(33, hsp.pos_num)
self.assertEqual(4, hsp.gap_num)
self.assertEqual(71, hsp.aln_span)
self.assertEqual(
"IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"+ +Y P G I L+G +TS GK F+ + T + +F QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|365982352|ref|XM_003667962.1|", hit.id)
self.assertEqual(
"Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA",
hit.description,
)
self.assertEqual(4932, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(19.631, hsp.bitscore)
self.assertEqual(39, hsp.bitscore_raw)
self.assertEqual(1.65923, hsp.evalue)
self.assertEqual(11, hsp.query_start)
self.assertEqual(54, hsp.query_end)
self.assertEqual(3180, hsp.hit_start)
self.assertEqual(3336, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(16, hsp.ident_num)
self.assertEqual(23, hsp.pos_num)
self.assertEqual(9, hsp.gap_num)
self.assertEqual(52, hsp.aln_span)
self.assertEqual(
"GSVFNTWKPMWVVLL---------EDGIEFYKKKSDNSPKGMIPLKGSTLTS", hsp.query.seq
)
self.assertEqual(
"GSCFPTWDLIFIEVLNPFLKEKLWEADNEEISKFVDLTLKGLVDLYPSHFTS", hsp.hit.seq
)
self.assertEqual(
"GS F TW +++ +L E E K D + KG++ L S TS",
hsp.aln_annotation["similarity"],
)
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastn_005(self):
xml_file = get_file("xml_2226_tblastn_005.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastn", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(32, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
self.assertEqual(
"membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
qresult.description,
)
self.assertEqual(102, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(3, len(qresult))
hit = qresult[0]
self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
self.assertEqual(
"Paramecium tetraurelia hypothetical protein (GSPATT00004923001) partial mRNA",
hit.description,
)
self.assertEqual(4632, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(34.6538, hsp.bitscore)
self.assertEqual(78, hsp.bitscore_raw)
self.assertEqual(0.755176, hsp.evalue)
self.assertEqual(30, hsp.query_start)
self.assertEqual(73, hsp.query_end)
self.assertEqual(1743, hsp.hit_start)
self.assertEqual(1872, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(15, hsp.ident_num)
self.assertEqual(26, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(43, hsp.aln_span)
self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq)
self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq)
self.assertEqual(
"P + TK+GT +GL HTI + + +SL++ E++ D+D",
hsp.aln_annotation["similarity"],
)
self.assertRaises(IndexError, hit.__getitem__, 1)
# test parsed values of the third qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|11464971:4-101", qresult.id)
self.assertEqual("pleckstrin [Mus musculus]", qresult.description)
self.assertEqual(98, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.041, qresult.stat_kappa)
self.assertEqual(0.267, qresult.stat_lambda)
self.assertEqual(0.14, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|354480463|ref|XM_003502378.1|", hit.id)
self.assertEqual(
"PREDICTED: Cricetulus griseus pleckstrin-like (LOC100773128), mRNA",
hit.description,
)
self.assertEqual(1119, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(205.297, hsp.bitscore)
self.assertEqual(521, hsp.bitscore_raw)
self.assertEqual(1.46172e-63, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(75, hsp.hit_start)
self.assertEqual(369, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(98, hsp.ident_num)
self.assertEqual(98, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(43.8986, hsp.bitscore)
self.assertEqual(102, hsp.bitscore_raw)
self.assertEqual(0.00054161, hsp.evalue)
self.assertEqual(2, hsp.query_start)
self.assertEqual(96, hsp.query_end)
self.assertEqual(801, hsp.hit_start)
self.assertEqual(1101, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(30, hsp.ident_num)
self.assertEqual(50, hsp.pos_num)
self.assertEqual(6, hsp.gap_num)
self.assertEqual(100, hsp.aln_span)
self.assertEqual(
"IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDF-GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA",
hsp.query.seq,
)
self.assertEqual(
"IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAIHLRGCVVTSVESNHDGKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA",
hsp.hit.seq,
)
self.assertEqual(
"I++G L+K+G WK +L ED + +Y P G I L+G +TS + GK+ + +I T + ++ QAA +ER W++ I+ A",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|390474391|ref|XM_002757683.2|", hit.id)
self.assertEqual(
"PREDICTED: Callithrix jacchus pleckstrin (PLEK), mRNA", hit.description
)
self.assertEqual(1402, hit.seq_len)
self.assertEqual(2, len(hit))
hsp = hit.hsps[0]
self.assertEqual(202.986, hsp.bitscore)
self.assertEqual(515, hsp.bitscore_raw)
self.assertEqual(1.27031e-61, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(98, hsp.query_end)
self.assertEqual(160, hsp.hit_start)
self.assertEqual(454, hsp.hit_end)
self.assertEqual(0, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(96, hsp.ident_num)
self.assertEqual(97, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(98, hsp.aln_span)
self.assertEqual(
"KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.query.seq,
)
self.assertEqual(
"KRIREGYLVKKGSMFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.hit.seq,
)
self.assertEqual(
"KRIREGYLVKKGS+FNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(3, counter)
class TblastxXmlCases(unittest.TestCase):
def test_xml_2212L_tblastx_001(self):
xml_file = get_file("xml_2212L_tblastx_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test the first qresult
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.12", qresult.version)
self.assertEqual(REFERENCE, qresult.reference)
self.assertEqual("BLOSUM80", qresult.param_matrix)
self.assertEqual(1, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(10, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastx", qresult.program)
self.assertEqual("nr", qresult.target)
# test parsed values of the first qresult
self.assertEqual("gi|1348853|gb|G26621.1|G26621", qresult.id)
self.assertEqual(
"human STS STS_D12006, sequence tagged site", qresult.description
)
self.assertEqual(615, qresult.seq_len)
self.assertEqual(3533718, qresult.stat_db_num)
# why is the value negative? is this a blast bug?
self.assertEqual(-1496331888, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0.177051, qresult.stat_kappa)
self.assertEqual(0.342969, qresult.stat_lambda)
self.assertEqual(0.656794, qresult.stat_entropy)
self.assertEqual(10, len(qresult))
hit = qresult[0]
self.assertEqual("gi|18072170|gb|AC010333.7|", hit.id)
self.assertEqual(
"Homo sapiens chromosome 16 clone CTD-3037G24, complete sequence",
hit.description,
)
self.assertEqual(159870, hit.seq_len)
self.assertEqual(13, len(hit))
hsp = hit.hsps[0]
self.assertEqual(329.561, hsp.bitscore)
self.assertEqual(661.0, hsp.bitscore_raw)
self.assertEqual(5.29552e-90, hsp.evalue)
self.assertEqual(1, hsp.query_start)
self.assertEqual(355, hsp.query_end)
self.assertEqual(44323, hsp.hit_start)
self.assertEqual(44677, hsp.hit_end)
self.assertEqual(-3, hsp.query_frame)
self.assertEqual(-3, hsp.hit_frame)
self.assertEqual(117, hsp.ident_num)
self.assertEqual(117, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(118, hsp.aln_span)
self.assertEqual(
"ECXFIMLYIFPARIWST*VICPPEQWL*RRKLSSGQKLLRRCGKTGYIKNNAGLK*PMRFCQGILH*CI*SKSGPIQRMWLKAPFWPFLFLLRALHTFPLFLSKWTK*RVS*VEGHSD",
hsp.query.seq,
)
self.assertEqual(
"ECCFIMLYIFPARIWST*VICPPEQWL*RRKLSSGQKLLRRCGKTGYIKNNAGLK*PMRFCQGILH*CI*SKSGPIQRMWLKAPFWPFLFLLRALHTFPLFLSKWTK*RVS*VEGHSD",
hsp.hit.seq,
)
self.assertEqual(
"EC FIMLYIFPARIWST*VICPPEQWL*RRKLSSGQKLLRRCGKTGYIKNNAGLK*PMRFCQGILH*CI*SKSGPIQRMWLKAPFWPFLFLLRALHTFPLFLSKWTK*RVS*VEGHSD",
hsp.aln_annotation["similarity"],
)
hit = qresult[-1]
self.assertEqual("gi|4309961|gb|AC005993.2|AC005993", hit.id)
self.assertEqual(
"Homo sapiens PAC clone RP6-114E22 from 14, complete sequence",
hit.description,
)
self.assertEqual(143943, hit.seq_len)
self.assertEqual(1, len(hit))
hsp = hit.hsps[0]
self.assertEqual(41.0922, hsp.bitscore)
self.assertEqual(78, hsp.bitscore_raw)
self.assertEqual(0.716571, hsp.evalue)
self.assertEqual(166, hsp.query_start)
self.assertEqual(250, hsp.query_end)
self.assertEqual(43679, hsp.hit_start)
self.assertEqual(43763, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(3, hsp.hit_frame)
self.assertEqual(13, hsp.ident_num)
self.assertEqual(19, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(28, hsp.aln_span)
self.assertEqual("PLTKAHRLFQTSIVFYVTCFTASSQQLL", hsp.query.seq)
self.assertEqual("PLNKYHTIFQISLCFYLFCYNMAQKQLL", hsp.hit.seq)
self.assertEqual(
"PL K H +FQ S+ FY+ C+ + +QLL", hsp.aln_annotation["similarity"]
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastx_001(self):
xml_file = get_file("xml_2226_tblastx_001.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastx", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|296147483:1-350", qresult.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
qresult.description,
)
self.assertEqual(350, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1954618.0, qresult.stat_eff_space)
self.assertEqual(0.133956144488482, qresult.stat_kappa)
self.assertEqual(0.317605957635731, qresult.stat_lambda)
self.assertEqual(0.401214524497119, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|296147483|ref|NM_001183135.1|", hit.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
hit.description,
)
self.assertEqual(4911, hit.seq_len)
self.assertEqual(8, len(hit))
hsp = hit.hsps[0]
self.assertEqual(289.739, hsp.bitscore)
self.assertEqual(626, hsp.bitscore_raw)
self.assertEqual(2.35531e-81, hsp.evalue)
self.assertEqual(1, hsp.query_start)
self.assertEqual(349, hsp.query_end)
self.assertEqual(1, hsp.hit_start)
self.assertEqual(349, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(116, hsp.ident_num)
self.assertEqual(116, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(116, hsp.aln_span)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.hit.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.query.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(18.9375, hsp.bitscore)
self.assertEqual(35, hsp.bitscore_raw)
self.assertEqual(7.78658, hsp.evalue)
self.assertEqual(292, hsp.query_start)
self.assertEqual(325, hsp.query_end)
self.assertEqual(340, hsp.hit_start)
self.assertEqual(373, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(-3, hsp.hit_frame)
self.assertEqual(6, hsp.ident_num)
self.assertEqual(9, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(11, hsp.aln_span)
self.assertEqual("KFWMPSLRLLI", hsp.query.seq)
self.assertEqual("KKWVPPVKLLI", hsp.hit.seq)
self.assertEqual("K W+P ++LLI", hsp.aln_annotation["similarity"])
hit = qresult[-1]
self.assertEqual("gi|254579534|ref|XM_002495708.1|", hit.id)
self.assertEqual(
"Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds",
hit.description,
)
self.assertEqual(4866, hit.seq_len)
self.assertEqual(6, len(hit))
hsp = hit.hsps[0]
self.assertEqual(141.279, hsp.bitscore)
self.assertEqual(302, hsp.bitscore_raw)
self.assertEqual(1.15566e-36, hsp.evalue)
self.assertEqual(96, hsp.query_start)
self.assertEqual(348, hsp.query_end)
self.assertEqual(96, hsp.hit_start)
self.assertEqual(348, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(57, hsp.ident_num)
self.assertEqual(72, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(84, hsp.aln_span)
self.assertEqual(
"IRHASDKSIEILKRVHSFEELERHPDFALPFVLACQSRNAKMTTLAMQCLQGLSTVPSIPRSRLSEILDAFIEATHLAMEIQLK",
hsp.query.seq,
)
self.assertEqual(
"IRNASDKSIEILKVVHSYEELSRHPDFIVPLVMSCASKNAKLTTISMQCFQKLATVPCIPVDKLSDVLDAFIEANQLAMDIKLK",
hsp.hit.seq,
)
self.assertEqual(
"IR+ASDKSIEILK VHS+EEL RHPDF +P V++C S+NAK+TT++MQC Q L+TVP IP +LS++LDAFIEA LAM+I+LK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
def test_xml_2226_tblastx_002(self):
xml_file = get_file("xml_2226_tblastx_002.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastx", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(-1, qresult.stat_kappa)
self.assertEqual(-1, qresult.stat_lambda)
self.assertEqual(-1, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastx_003(self):
xml_file = get_file("xml_2226_tblastx_003.xml")
qresults = parse(xml_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastx", qresult.program)
self.assertEqual("db/minirefseq_mrna", qresult.target)
self.assertEqual("gi|296147483:1-350", qresult.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
qresult.description,
)
self.assertEqual(350, qresult.seq_len)
self.assertEqual(23, qresult.stat_db_num)
self.assertEqual(67750, qresult.stat_db_len)
self.assertEqual(1954618.0, qresult.stat_eff_space)
self.assertEqual(0.133956144488482, qresult.stat_kappa)
self.assertEqual(0.317605957635731, qresult.stat_lambda)
self.assertEqual(0.401214524497119, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
hit = qresult[0]
self.assertEqual("gi|296147483|ref|NM_001183135.1|", hit.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
hit.description,
)
self.assertEqual(4911, hit.seq_len)
self.assertEqual(8, len(hit))
hsp = hit.hsps[0]
self.assertEqual(289.739, hsp.bitscore)
self.assertEqual(626, hsp.bitscore_raw)
self.assertEqual(2.35531e-81, hsp.evalue)
self.assertEqual(1, hsp.query_start)
self.assertEqual(349, hsp.query_end)
self.assertEqual(1, hsp.hit_start)
self.assertEqual(349, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(116, hsp.ident_num)
self.assertEqual(116, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(116, hsp.aln_span)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.hit.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.query.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(18.9375, hsp.bitscore)
self.assertEqual(35, hsp.bitscore_raw)
self.assertEqual(7.78658, hsp.evalue)
self.assertEqual(292, hsp.query_start)
self.assertEqual(325, hsp.query_end)
self.assertEqual(340, hsp.hit_start)
self.assertEqual(373, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(-3, hsp.hit_frame)
self.assertEqual(6, hsp.ident_num)
self.assertEqual(9, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(11, hsp.aln_span)
self.assertEqual("KFWMPSLRLLI", hsp.query.seq)
self.assertEqual("KKWVPPVKLLI", hsp.hit.seq)
self.assertEqual("K W+P ++LLI", hsp.aln_annotation["similarity"])
hit = qresult[-1]
self.assertEqual("gi|254579534|ref|XM_002495708.1|", hit.id)
self.assertEqual(
"Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds",
hit.description,
)
self.assertEqual(4866, hit.seq_len)
self.assertEqual(6, len(hit))
hsp = hit.hsps[0]
self.assertEqual(141.279, hsp.bitscore)
self.assertEqual(302, hsp.bitscore_raw)
self.assertEqual(1.15566e-36, hsp.evalue)
self.assertEqual(96, hsp.query_start)
self.assertEqual(348, hsp.query_end)
self.assertEqual(96, hsp.hit_start)
self.assertEqual(348, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(57, hsp.ident_num)
self.assertEqual(72, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(84, hsp.aln_span)
self.assertEqual(
"IRHASDKSIEILKRVHSFEELERHPDFALPFVLACQSRNAKMTTLAMQCLQGLSTVPSIPRSRLSEILDAFIEATHLAMEIQLK",
hsp.query.seq,
)
self.assertEqual(
"IRNASDKSIEILKVVHSYEELSRHPDFIVPLVMSCASKNAKLTTISMQCFQKLATVPCIPVDKLSDVLDAFIEANQLAMDIKLK",
hsp.hit.seq,
)
self.assertEqual(
"IR+ASDKSIEILK VHS+EEL RHPDF +P V++C S+NAK+TT++MQC Q L+TVP IP +LS++LDAFIEA LAM+I+LK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_xml_2226_tblastx_004(self):
xml_file = get_file("xml_2226_tblastx_004.xml")
qresults = parse(xml_file, FMT)
counter = 0
# test each qresult's attributes
qresult = next(qresults)
counter += 1
# test meta variables, only for the first one
self.assertEqual("2.2.26+", qresult.version)
self.assertEqual(
"Stephen F. Altschul, Thomas L. Madden, Alejandro "
"A. Schäffer, Jinghui Zhang, Zheng Zhang, "
"Webb Miller, and David J. Lipman (1997), "
'"Gapped BLAST and PSI-BLAST: a new generation of '
'protein database search programs", '
"Nucleic Acids Res. 25:3389-3402.",
qresult.reference,
)
self.assertEqual("BLOSUM62", qresult.param_matrix)
self.assertEqual(10.0, qresult.param_evalue_threshold)
self.assertEqual("L;", qresult.param_filter)
self.assertEqual(11, qresult.param_gap_open)
self.assertEqual(1, qresult.param_gap_extend)
self.assertEqual("tblastx", qresult.program)
self.assertEqual("refseq_rna", qresult.target)
# test parsed values of the first qresult
self.assertEqual("random_s00", qresult.id)
self.assertEqual("", qresult.description)
self.assertEqual(128, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0, qresult.stat_kappa)
self.assertEqual(0, qresult.stat_lambda)
self.assertEqual(0, qresult.stat_entropy)
self.assertEqual(0, len(qresult))
# test parsed values of the second qresult
qresult = next(qresults)
counter += 1
self.assertEqual("gi|296147483:1-350", qresult.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
qresult.description,
)
self.assertEqual(350, qresult.seq_len)
self.assertEqual(2933984, qresult.stat_db_num)
self.assertEqual(4726730735, qresult.stat_db_len)
self.assertEqual(0, qresult.stat_eff_space)
self.assertEqual(0, qresult.stat_kappa)
self.assertEqual(0, qresult.stat_lambda)
self.assertEqual(0, qresult.stat_entropy)
self.assertEqual(5, len(qresult))
# check for alternative ID results
self.assertEqual(
qresult["gi|296147483|ref|NM_001183135.1|"],
qresult["gi|116616412|gb|EF059095.1|"],
)
hit = qresult[0]
self.assertEqual("gi|296147483|ref|NM_001183135.1|", hit.id)
self.assertEqual(
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
hit.description,
)
self.assertEqual("gi|116616412|gb|EF059095.1|", hit.id_all[1])
self.assertEqual(
"Synthetic construct Saccharomyces cerevisiae "
"clone FLH203015.01X MON2, complete sequence",
hit.description_all[1],
)
self.assertEqual(4911, hit.seq_len)
self.assertEqual(7, len(hit))
hsp = hit.hsps[0]
self.assertEqual(289.739, hsp.bitscore)
self.assertEqual(626, hsp.bitscore_raw)
self.assertEqual(1.05874e-76, hsp.evalue)
self.assertEqual(1, hsp.query_start)
self.assertEqual(349, hsp.query_end)
self.assertEqual(1, hsp.hit_start)
self.assertEqual(349, hsp.hit_end)
self.assertEqual(2, hsp.query_frame)
self.assertEqual(2, hsp.hit_frame)
self.assertEqual(116, hsp.ident_num)
self.assertEqual(116, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(116, hsp.aln_span)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.hit.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.query.seq,
)
self.assertEqual(
"WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINPLRY*KEYIVLRS*KDIRISHSLSCWLANQGMLK*RPWQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K",
hsp.aln_annotation["similarity"],
)
hsp = hit.hsps[-1]
self.assertEqual(36.3494, hsp.bitscore)
self.assertEqual(73, hsp.bitscore_raw)
self.assertEqual(9.12288e-54, hsp.evalue)
self.assertEqual(0, hsp.query_start)
self.assertEqual(42, hsp.query_end)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(42, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(14, hsp.ident_num)
self.assertEqual(14, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(14, hsp.aln_span)
self.assertEqual("MAMNTGGFDSMQRQ", hsp.query.seq)
self.assertEqual("MAMNTGGFDSMQRQ", hsp.hit.seq)
self.assertEqual("MAMNTGGFDSMQRQ", hsp.aln_annotation["similarity"])
hit = qresult[-1]
self.assertEqual("gi|254579534|ref|XM_002495708.1|", hit.id)
self.assertEqual(
"Zygosaccharomyces rouxii hypothetical protein "
"(ZYRO0C02266g) mRNA, complete cds",
hit.description,
)
self.assertEqual(4866, hit.seq_len)
self.assertEqual(4, len(hit))
hsp = hit.hsps[0]
self.assertEqual(141.279, hsp.bitscore)
self.assertEqual(302, hsp.bitscore_raw)
self.assertEqual(5.19486e-32, hsp.evalue)
self.assertEqual(96, hsp.query_start)
self.assertEqual(348, hsp.query_end)
self.assertEqual(96, hsp.hit_start)
self.assertEqual(348, hsp.hit_end)
self.assertEqual(1, hsp.query_frame)
self.assertEqual(1, hsp.hit_frame)
self.assertEqual(57, hsp.ident_num)
self.assertEqual(72, hsp.pos_num)
self.assertEqual(0, hsp.gap_num)
self.assertEqual(84, hsp.aln_span)
self.assertEqual(
"IRHASDKSIEILKRVHSFEELERHPDFALPFVLACQSRNAKMTTLAMQCLQGLSTVPSIPRSRLSEILDAFIEATHLAMEIQLK",
hsp.query.seq,
)
self.assertEqual(
"IRNASDKSIEILKVVHSYEELSRHPDFIVPLVMSCASKNAKLTTISMQCFQKLATVPCIPVDKLSDVLDAFIEANQLAMDIKLK",
hsp.hit.seq,
)
self.assertEqual(
"IR+ASDKSIEILK VHS+EEL RHPDF +P V++C S+NAK+TT++MQC Q L+TVP IP +LS++LDAFIEA LAM+I+LK",
hsp.aln_annotation["similarity"],
)
# check if we've finished iteration over qresults
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(2, counter)
class BlastXmlSpecialCases(unittest.TestCase):
def test_xml_2226_blastn_006(self):
xml_file = get_file("xml_2226_blastn_006.xml")
qresults = parse(xml_file, FMT)
exp_warning = 1
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", BiopythonParserWarning)
qresult = next(qresults)
self.assertEqual(
exp_warning,
len(w),
f"Expected {exp_warning} warning(s), got {len(w)}",
)
self.assertEqual(qresult.blast_id, "Query_1")
hit1 = qresult[0]
hit2 = qresult[1]
self.assertEqual("gnl|BL_ORD_ID|18", hit1.blast_id)
self.assertEqual("gi|347972582|ref|XM_309352.4|", hit1.id)
self.assertEqual(
"Anopheles gambiae str. PEST AGAP011294-PA (DEFI_ANOGA) mRNA, complete cds",
hit1.description,
)
self.assertEqual("gnl|BL_ORD_ID|17", hit2.blast_id)
self.assertEqual("gnl|BL_ORD_ID|17", hit2.id)
self.assertEqual(
"gi|347972582|ref|XM_309352.4| Anopheles gambiae str. PEST AGAP011294-PA (DEFI_ANOGA) mRNA, complete cds",
hit2.description,
)
def test_xml_2226_blastn_006_use_raw_hit_ids(self):
xml_file = get_file("xml_2226_blastn_006.xml")
qresults = parse(xml_file, FMT, use_raw_hit_ids=True)
exp_warning = 0
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", BiopythonParserWarning)
qresult = next(qresults)
self.assertEqual(
exp_warning,
len(w),
f"Expected {exp_warning} warning(s), got {len(w)}",
)
self.assertEqual(qresult.blast_id, "Query_1")
hit1 = qresult[0]
hit2 = qresult[1]
self.assertEqual("gnl|BL_ORD_ID|18", hit1.blast_id)
self.assertEqual("gnl|BL_ORD_ID|18", hit1.id)
self.assertEqual(
"gi|347972582|ref|XM_309352.4| Anopheles gambiae str. PEST AGAP011294-PA (DEFI_ANOGA) mRNA, complete cds",
hit1.description,
)
self.assertEqual("gnl|BL_ORD_ID|17", hit2.blast_id)
self.assertEqual("gnl|BL_ORD_ID|17", hit2.id)
self.assertEqual(
"gi|347972582|ref|XM_309352.4| Anopheles gambiae str. PEST AGAP011294-PA (DEFI_ANOGA) mRNA, complete cds",
hit2.description,
)
def test_xml_2226_blastn_006_use_raw_query_ids(self):
xml_file = get_file("xml_2226_blastn_006.xml")
qresults = parse(xml_file, FMT, use_raw_query_ids=True)
exp_warning = 1
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", BiopythonParserWarning)
qresult = next(qresults)
self.assertEqual(
exp_warning,
len(w),
f"Expected {exp_warning} warning(s), got {len(w)}",
)
self.assertEqual(qresult.id, "Query_1")
self.assertEqual(
qresult.description,
"gi|347972582|ref|XM_309352.4| Anopheles gambiae str. PEST AGAP011294-PA (DEFI_ANOGA) mRNA, complete cds",
)
self.assertEqual(qresult.blast_id, "Query_1")
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)