add psiblast test (#4889)

* update

* update

---------

Co-authored-by: Michiel de Hoon <mdehoon@madpc2s-MacBook-Pro.local>
This commit is contained in:
mdehoon
2024-11-21 17:29:30 +09:00
committed by GitHub
parent 68dc8b6463
commit 6ebae6a66d
5 changed files with 1210 additions and 574 deletions

View File

@ -30,6 +30,8 @@ from Bio.SeqFeature import SeqFeature
from Bio.SeqFeature import SimpleLocation
from Bio.SeqRecord import SeqRecord
import numpy as np
class DTDHandler:
"""Parser for the BLAST XML DTD file."""
@ -132,7 +134,6 @@ class SchemaHandler:
"message",
"subjects",
"bl2seq",
"iter-num",
):
pass # TBD
else:
@ -351,6 +352,9 @@ class XMLHandler:
self._characters = ""
def _start_iteration(self, name, attributes):
if self._program == "psiblast" and name == "http://www.ncbi.nlm.nih.gov Search":
# PSIBLAST XML2 uses both <Iteration> and <Search>; ignore one
return
record = Record()
self._record = record
@ -714,6 +718,9 @@ class XMLHandler:
def _end_iteration(self, name):
assert self._characters.strip() == ""
self._characters = ""
if self._program == "psiblast" and name == "http://www.ncbi.nlm.nih.gov Search":
# PSIBLAST XML2 uses both <Iteration> and <Search>; ignore one
return
self._records._cache.append(self._record)
del self._record
@ -926,106 +933,117 @@ class XMLHandler:
hsp = self._hsp
del self._hsp
program = self._program
align_len = hsp.align_len
query = self._record.query
if query is None:
query = self._records.query
query_id = query.id
query_description = query.description
query_length = len(query.seq)
query_seq_aligned = hsp.qseq.encode()
assert len(query_seq_aligned) == align_len
target_seq_aligned = hsp.hseq.encode()
assert len(target_seq_aligned) == align_len
(target_seq_data, query_seq_data), coordinates = (
Alignment.parse_printed_alignment([target_seq_aligned, query_seq_aligned])
)
query = SeqRecord(None, query_id, description=query_description)
query_start = hsp.query_from - 1
query_end = hsp.query_to
if program in ("blastx", "tblastx"):
assert query_end - query_start == 3 * len(query_seq_data)
location = SimpleLocation(0, len(query_seq_data))
coded_by = f"{query_id}:{hsp.query_from}..{hsp.query_to}"
query_frame = hsp.query_frame
if query_frame > 0:
assert query_start % 3 == query_frame - 1
elif query_frame < 0:
assert (query_length - query_end) % 3 == -query_frame - 1
coded_by = f"complement({coded_by})"
qualifiers = {"coded_by": coded_by}
feature = SeqFeature(location, type="CDS", qualifiers=qualifiers)
query.features.append(feature)
else:
coordinates[1, :] += query_start
assert query_end - query_start == len(query_seq_data)
query_seq_data = {query_start: query_seq_data}
if program == "blastn":
try:
query_strand = hsp.query_strand
except AttributeError:
# v1 XML
pass
else:
# v2 XML
assert query_strand == "Plus"
query.seq = Seq(query_seq_data, query_length)
target = self._alignments.target
target_id = target.id
target_name = target.name
target_description = target.description
target_length = len(target.seq)
target = SeqRecord(None, target_id, target_name, description=target_description)
if program in ("blastn", "megablast"):
try:
target_strand = hsp.hit_strand
except AttributeError:
# v1 XML
target_frame = hsp.hit_frame
if target_frame == +1:
target_strand = "Plus"
elif target_frame == -1:
target_strand = "Minus"
if target_strand == "Plus":
query_seq_aligned = hsp.qseq.encode()
target_seq_aligned = hsp.hseq.encode()
try:
align_len = hsp.align_len
except AttributeError: # PSIBLAST XML2
assert len(query_seq_aligned) == 0
assert len(target_seq_aligned) == 0
query_seq_data = None
target.seq = Seq(None, target_length)
coordinates = np.empty((2, 0), dtype=int)
else:
assert len(query_seq_aligned) == align_len
assert len(target_seq_aligned) == align_len
(
target_seq_data,
query_seq_data,
), coordinates = Alignment.parse_printed_alignment(
[target_seq_aligned, query_seq_aligned]
)
query_start = hsp.query_from - 1
query_end = hsp.query_to
if program in ("blastx", "tblastx"):
assert query_end - query_start == 3 * len(query_seq_data)
location = SimpleLocation(0, len(query_seq_data))
coded_by = f"{query_id}:{hsp.query_from}..{hsp.query_to}"
query_frame = hsp.query_frame
if query_frame > 0:
assert query_start % 3 == query_frame - 1
elif query_frame < 0:
assert (query_length - query_end) % 3 == -query_frame - 1
coded_by = f"complement({coded_by})"
qualifiers = {"coded_by": coded_by}
feature = SeqFeature(location, type="CDS", qualifiers=qualifiers)
query.features.append(feature)
else:
coordinates[1, :] += query_start
assert query_end - query_start == len(query_seq_data)
query_seq_data = {query_start: query_seq_data}
if program == "blastn":
try:
query_strand = hsp.query_strand
except AttributeError:
# v1 XML
pass
else:
# v2 XML
assert query_strand == "Plus"
if program in ("blastn", "megablast"):
try:
target_strand = hsp.hit_strand
except AttributeError:
# v1 XML
target_frame = hsp.hit_frame
if target_frame == +1:
target_strand = "Plus"
elif target_frame == -1:
target_strand = "Minus"
if target_strand == "Plus":
target_start = hsp.hit_from - 1
target_end = hsp.hit_to
coordinates[0, :] += target_start
assert target_end - target_start == len(target_seq_data)
target_seq_data = {target_start: target_seq_data}
target.seq = Seq(target_seq_data, target_length)
elif target_strand == "Minus":
target_start = hsp.hit_to - 1
target_end = hsp.hit_from
coordinates[0, :] = target_end - coordinates[0, :]
assert target_end - target_start == len(target_seq_data)
target_seq_data = {target_length - target_end: target_seq_data}
seq = Seq(target_seq_data, target_length)
target.seq = seq.reverse_complement()
elif program in ("blastp", "blastx", "rpsblast", "psiblast"):
target_start = hsp.hit_from - 1
target_end = hsp.hit_to
coordinates[0, :] += target_start
assert target_end - target_start == len(target_seq_data)
target_seq_data = {target_start: target_seq_data}
target.seq = Seq(target_seq_data, target_length)
elif target_strand == "Minus":
target_start = hsp.hit_to - 1
target_end = hsp.hit_from
coordinates[0, :] = target_end - coordinates[0, :]
assert target_end - target_start == len(target_seq_data)
target_seq_data = {target_length - target_end: target_seq_data}
seq = Seq(target_seq_data, target_length)
target.seq = seq.reverse_complement()
elif program in ("blastp", "blastx", "rpsblast", "psiblast"):
target_start = hsp.hit_from - 1
target_end = hsp.hit_to
coordinates[0, :] += target_start
assert target_end - target_start == len(target_seq_data)
target_seq_data = {target_start: target_seq_data}
target.seq = Seq(target_seq_data, target_length)
elif program in ("tblastn", "tblastx"):
target_start = hsp.hit_from - 1
target_end = hsp.hit_to
assert target_end - target_start == 3 * len(target_seq_data)
location = SimpleLocation(0, target_length)
coded_by = f"{target_id}:{hsp.hit_from}..{hsp.hit_to}"
target_frame = hsp.hit_frame
if target_frame >= 0:
assert target_start % 3 == target_frame - 1
elif target_frame < 0:
assert (target_length - target_end) % 3 == -target_frame - 1
coded_by = f"complement({coded_by})"
qualifiers = {"coded_by": coded_by}
feature = SeqFeature(location, type="CDS", qualifiers=qualifiers)
target.features.append(feature)
target.seq = Seq(target_seq_data, target_length)
else:
raise RuntimeError("Unexpected program name '%s'" % program)
elif program in ("tblastn", "tblastx"):
target_start = hsp.hit_from - 1
target_end = hsp.hit_to
assert target_end - target_start == 3 * len(target_seq_data)
location = SimpleLocation(0, target_length)
coded_by = f"{target_id}:{hsp.hit_from}..{hsp.hit_to}"
target_frame = hsp.hit_frame
if target_frame >= 0:
assert target_start % 3 == target_frame - 1
elif target_frame < 0:
assert (target_length - target_end) % 3 == -target_frame - 1
coded_by = f"complement({coded_by})"
qualifiers = {"coded_by": coded_by}
feature = SeqFeature(location, type="CDS", qualifiers=qualifiers)
target.features.append(feature)
target.seq = Seq(target_seq_data, target_length)
else:
raise RuntimeError("Unexpected program name '%s'" % program)
query.seq = Seq(query_seq_data, query_length)
sequences = [target, query]
alignment = HSP(sequences, coordinates)
alignment.num = hsp.num
@ -1044,7 +1062,11 @@ class XMLHandler:
except AttributeError:
# missing in legacy megablast
pass
annotations["midline"] = hsp.midline
try:
annotations["midline"] = hsp.midline
except AttributeError:
# missing in psiblast for XML2
pass
alignment.annotations = annotations
self._alignments.append(alignment)

View File

@ -70,7 +70,6 @@ class BaseXMLWriter(ABC):
def _write_params(self, param):
self._start_param()
self.stream.write(b" <Parameters>\n")
value = param.get("matrix")
if value is not None:
self._write_parameters_matrix(value.encode())
@ -111,7 +110,6 @@ class BaseXMLWriter(ABC):
value = param.get("bl2seq-mode")
if value is not None:
self._write_parameters_bl2seq_mode(value.encode())
self.stream.write(b" </Parameters>\n")
self._end_param()
def _write_records(self, records):
@ -125,20 +123,16 @@ class BaseXMLWriter(ABC):
def _write_record(self, record):
stream = self.stream
self._start_iteration()
try:
num = record.num
except AttributeError: # XML2
pass
else:
self._write_iteration_num(num)
self._start_iteration(record)
query = record.query
if query is None:
query_length = None
else:
query_length = len(query.seq)
self._write_iteration_query_id(query.id.encode())
self._write_iteration_query_def(query.description.encode())
description = query.description
if description != "<unknown description>":
self._write_iteration_query_def(description.encode())
self._write_iteration_query_len(query_length)
for feature in query.features:
if feature.type == "masking":
@ -198,76 +192,88 @@ class BaseXMLWriter(ABC):
query = hsp.query
target = hsp.target
coordinates = hsp.coordinates
hit_from, query_from = coordinates[:, 0]
hit_to, query_to = coordinates[:, -1]
if program in ("blastn", "megablast"):
if hit_from <= hit_to:
hit_frame = 1
if coordinates.shape[1] > 0:
hit_from, query_from = coordinates[:, 0]
hit_to, query_to = coordinates[:, -1]
if program in ("blastn", "megablast"):
if hit_from <= hit_to:
hit_frame = 1
hit_from += 1
else:
hit_frame = -1
hit_to += 1
elif program in ("blastp", "blastx", "rpsblast", "psiblast"):
hit_from += 1
else:
hit_frame = -1
hit_to += 1
elif program in ("blastp", "blastx", "rpsblast"):
hit_from += 1
hit_frame = 0
elif program in ("tblastn", "tblastx"):
feature = target.features[0]
coded_by = feature.qualifiers["coded_by"]
if coded_by.startswith("complement("):
assert coded_by.endswith(")")
coded_by = coded_by[11:-1]
strand = -1
else:
strand = +1
hit_id, hit_from_to = coded_by.split(":")
hit_from, hit_to = hit_from_to.split("..")
hit_from = int(hit_from)
hit_to = int(hit_to)
hit_start = hit_from - 1
hit_end = hit_to
if strand == +1:
hit_frame = hit_start % 3 + 1
else:
hit_frame = (hit_end - target_length) % -3 - 1
if program in ("blastn", "megablast"):
if query_from <= query_to:
hit_frame = 0
elif program in ("tblastn", "tblastx"):
feature = target.features[0]
coded_by = feature.qualifiers["coded_by"]
if coded_by.startswith("complement("):
assert coded_by.endswith(")")
coded_by = coded_by[11:-1]
strand = -1
else:
strand = +1
hit_id, hit_from_to = coded_by.split(":")
hit_from, hit_to = hit_from_to.split("..")
hit_from = int(hit_from)
hit_to = int(hit_to)
hit_start = hit_from - 1
hit_end = hit_to
if strand == +1:
hit_frame = hit_start % 3 + 1
else:
hit_frame = (hit_end - target_length) % -3 - 1
if program in ("blastn", "megablast"):
if query_from <= query_to:
query_from += 1
query_frame = 1
else:
query_to += 1
query_frame = -1
elif program in ("blastp", "tblastn", "rpsblast", "psiblast"):
query_from += 1
query_frame = 1
else:
query_to += 1
query_frame = -1
elif program in ("blastp", "tblastn", "rpsblast"):
query_from += 1
query_frame = 0
elif program in ("blastx", "tblastx"):
feature = query.features[0]
coded_by = feature.qualifiers["coded_by"]
if coded_by.startswith("complement("):
assert coded_by.endswith(")")
coded_by = coded_by[11:-1]
strand = -1
else:
strand = +1
query_id, query_from_to = coded_by.split(":")
query_from, query_to = query_from_to.split("..")
query_from = int(query_from)
query_to = int(query_to)
query_start = query_from - 1
query_end = query_to
if strand == +1:
query_frame = query_start % 3 + 1
else:
query_frame = (query_end - query_length) % -3 - 1
hseq = hsp[0]
qseq = hsp[1]
align_len = len(hseq)
query_frame = 0
elif program in ("blastx", "tblastx"):
feature = query.features[0]
coded_by = feature.qualifiers["coded_by"]
if coded_by.startswith("complement("):
assert coded_by.endswith(")")
coded_by = coded_by[11:-1]
strand = -1
else:
strand = +1
query_id, query_from_to = coded_by.split(":")
query_from, query_to = query_from_to.split("..")
query_from = int(query_from)
query_to = int(query_to)
query_start = query_from - 1
query_end = query_to
if strand == +1:
query_frame = query_start % 3 + 1
else:
query_frame = (query_end - query_length) % -3 - 1
hseq = hsp[0]
qseq = hsp[1]
align_len = len(hseq)
else:
# PSIBLAST XML2
query_from = 0
query_to = 0
hit_from = 0
hit_to = 0
hseq = ""
qseq = ""
query_frame = None
hit_frame = None
align_len = None
annotations = hsp.annotations
bit_score = annotations["bit score"]
evalue = annotations["evalue"]
identity = annotations["identity"]
positive = annotations.get("positive")
gaps = annotations.get("gaps")
midline = annotations["midline"]
midline = annotations.get("midline")
self._start_hsp()
self._write_hsp_num(hsp.num)
self._write_hsp_bit_score(str(bit_score).encode())
@ -277,17 +283,21 @@ class BaseXMLWriter(ABC):
self._write_hsp_query_to(query_to)
self._write_hsp_hit_from(hit_from)
self._write_hsp_hit_to(hit_to)
self._write_hsp_query_frame(query_frame)
self._write_hsp_hit_frame(hit_frame)
if query_frame is not None:
self._write_hsp_query_frame(query_frame)
if hit_frame is not None:
self._write_hsp_hit_frame(hit_frame)
self._write_hsp_identity(identity)
if positive is not None:
self._write_hsp_positive(positive)
if gaps is not None:
self._write_hsp_gaps(gaps)
self._write_hsp_align_len(align_len)
if align_len is not None:
self._write_hsp_align_len(align_len)
self._write_hsp_qseq(qseq.encode())
self._write_hsp_hseq(hseq.encode())
self._write_hsp_midline(midline.encode())
if midline is not None:
self._write_hsp_midline(midline.encode())
self._end_hsp()
def _write_statistics(self, stat):
@ -645,10 +655,20 @@ class XMLWriter(BaseXMLWriter):
)
def _start_param(self):
self.stream.write(b" <BlastOutput_param>\n")
self.stream.write(
b"""\
<BlastOutput_param>
<Parameters>
"""
)
def _end_param(self):
self.stream.write(b" </BlastOutput_param>\n")
self.stream.write(
b"""\
</Parameters>
</BlastOutput_param>
"""
)
def _start_iterations(self):
self.stream.write(b"<BlastOutput_iterations>\n")
@ -662,8 +682,9 @@ class XMLWriter(BaseXMLWriter):
def _end_mbstat(self):
self.stream.write(b" </BlastOutput_mbstat>\n")
def _start_iteration(self):
def _start_iteration(self, record):
self.stream.write(b"<Iteration>\n")
self._write_iteration_num(record.num)
def _end_iteration(self):
self.stream.write(b"</Iteration>\n")
@ -709,35 +730,35 @@ class XMLWriter(BaseXMLWriter):
def _write_parameters_sc_match(self, value):
self.stream.write(
b" <Parameters_sc-match>%d</Parameters_sc-match>\n" % value
b" <Parameters_sc-match>%d</Parameters_sc-match>\n" % value
)
def _write_parameters_sc_mismatch(self, value):
self.stream.write(
b" <Parameters_sc-mismatch>%d</Parameters_sc-mismatch>\n" % value
b" <Parameters_sc-mismatch>%d</Parameters_sc-mismatch>\n" % value
)
def _write_parameters_gap_open(self, value):
self.stream.write(
b" <Parameters_gap-open>%d</Parameters_gap-open>\n" % value
b" <Parameters_gap-open>%d</Parameters_gap-open>\n" % value
)
def _write_parameters_gap_extend(self, value):
self.stream.write(
b" <Parameters_gap-extend>%d</Parameters_gap-extend>\n" % value
b" <Parameters_gap-extend>%d</Parameters_gap-extend>\n" % value
)
def _write_parameters_filter(self, value):
self.stream.write(b" <Parameters_filter>%b</Parameters_filter>\n" % value)
self.stream.write(b" <Parameters_filter>%b</Parameters_filter>\n" % value)
def _write_parameters_pattern(self, value):
self.stream.write(
b" <Parameters_pattern>%b</Parameters_pattern>\n" % value
b" <Parameters_pattern>%b</Parameters_pattern>\n" % value
)
def _write_parameters_entrez_query(self, value):
self.stream.write(
b" <Parameters_entrez-query>%b</Parameters_entrez-query>\n" % value
b" <Parameters_entrez-query>%b</Parameters_entrez-query>\n" % value
)
def _write_statistics_db_num(self, db_num):
@ -943,7 +964,18 @@ class XML2Writer(BaseXMLWriter):
"""
)
def _start_iteration(self):
def _start_iteration(self, record):
try:
num = record.num
except AttributeError:
pass
else: # PSIBLAST
self.stream.write(
b"""\
<Iteration>
"""
)
self._write_iteration_num(num)
self.stream.write(
b"""\
<search>
@ -958,6 +990,15 @@ class XML2Writer(BaseXMLWriter):
</search>
"""
)
if self._program == "psiblast":
self.stream.write(
b"""\
</Iteration>
"""
)
def _write_iteration_num(self, num):
self.stream.write(b" <iter-num>%d</iter-num>\n" % num)
def _write_iteration_query_id(self, query_id):
self.stream.write(

View File

@ -0,0 +1,137 @@
<?xml version="1.0"?>
<BlastXML2
xmlns="http://www.ncbi.nlm.nih.gov"
xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"
xs:schemaLocation="http://www.ncbi.nlm.nih.gov http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/NCBI_BlastOutput2.xsd"
>
<BlastOutput2>
<report>
<Report>
<program>psiblast</program>
<version>PSIBLAST 2.15.0+</version>
<reference>Alejandro A. Sch&amp;auml;ffer, L. Aravind, Thomas L. Madden, Sergei Shavirin, John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and Stephen F. Altschul (2001), &quot;Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements&quot;, Nucleic Acids Res. 29:2994-3005.</reference>
<search-target>
<Target>
<db>swissprot</db>
</Target>
</search-target>
<params>
<Parameters>
<matrix>BLOSUM62</matrix>
<expect>1e-30</expect>
<gap-open>11</gap-open>
<gap-extend>1</gap-extend>
<filter>F</filter>
<cbs>2</cbs>
</Parameters>
</params>
<results>
<Results>
<iterations>
<Iteration>
<iter-num>1</iter-num>
<search>
<Search>
<query-id>lcl|Query_1</query-id>
<query-len>103</query-len>
<hits>
<Hit>
<num>1</num>
<description>
<HitDescr>
<id>sp|P69428.1|</id>
<accession>P69428</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12]</title>
<taxid>83333</taxid>
</HitDescr>
<HitDescr>
<id>sp|P69429.1|</id>
<accession>P69429</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli CFT073]</title>
<taxid>199310</taxid>
</HitDescr>
<HitDescr>
<id>sp|P69430.1|</id>
<accession>P69430</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli O157:H7]</title>
<taxid>83334</taxid>
</HitDescr>
<HitDescr>
<id>sp|P69431.1|</id>
<accession>P69431</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Shigella flexneri]</title>
<taxid>623</taxid>
</HitDescr>
</description>
<len>89</len>
<hsps>
<Hsp>
<num>1</num>
<bit-score>177.178</bit-score>
<score>448</score>
<evalue>2.3039e-58</evalue>
<identity>89</identity>
<query-from>0</query-from>
<query-to>0</query-to>
<hit-from>0</hit-from>
<hit-to>0</hit-to>
<qseq></qseq>
<hseq></hseq>
</Hsp>
</hsps>
</Hit>
<Hit>
<num>2</num>
<description>
<HitDescr>
<id>sp|P0A2H3.1|</id>
<accession>P0A2H3</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2]</title>
<taxid>99287</taxid>
</HitDescr>
<HitDescr>
<id>sp|P0A2H4.1|</id>
<accession>P0A2H4</accession>
<title>RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhi]</title>
<taxid>90370</taxid>
</HitDescr>
</description>
<len>84</len>
<hsps>
<Hsp>
<num>1</num>
<bit-score>142.51</bit-score>
<score>358</score>
<evalue>1.0691e-44</evalue>
<identity>75</identity>
<query-from>0</query-from>
<query-to>0</query-to>
<hit-from>0</hit-from>
<hit-to>0</hit-to>
<qseq></qseq>
<hseq></hseq>
</Hsp>
</hsps>
</Hit>
</hits>
<stat>
<Statistics>
<db-num>482816</db-num>
<db-len>183558113</db-len>
<hsp-len>72</hsp-len>
<eff-space>4627826878</eff-space>
<kappa>0.041</kappa>
<lambda>0.267</lambda>
<entropy>0.14</entropy>
</Statistics>
</stat>
</Search>
</search>
</Iteration>
</iterations>
</Results>
</results>
</Report>
</report>
</BlastOutput2>
</BlastXML2>

View File

@ -0,0 +1,98 @@
<?xml version="1.0"?>
<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
<BlastOutput>
<BlastOutput_program>psiblast</BlastOutput_program>
<BlastOutput_version>PSIBLAST 2.15.0+</BlastOutput_version>
<BlastOutput_reference>Alejandro A. Sch&amp;auml;ffer, L. Aravind, Thomas L. Madden, Sergei Shavirin, John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and Stephen F. Altschul (2001), &quot;Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements&quot;, Nucleic Acids Res. 29:2994-3005.</BlastOutput_reference>
<BlastOutput_db>swissprot</BlastOutput_db>
<BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
<BlastOutput_query-def>WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]</BlastOutput_query-def>
<BlastOutput_query-len>103</BlastOutput_query-len>
<BlastOutput_param>
<Parameters>
<Parameters_matrix>BLOSUM62</Parameters_matrix>
<Parameters_expect>1e-30</Parameters_expect>
<Parameters_gap-open>11</Parameters_gap-open>
<Parameters_gap-extend>1</Parameters_gap-extend>
<Parameters_filter>F</Parameters_filter>
</Parameters>
</BlastOutput_param>
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
<Iteration_query-ID>Query_1</Iteration_query-ID>
<Iteration_query-def>WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]</Iteration_query-def>
<Iteration_query-len>103</Iteration_query-len>
<Iteration_hits>
<Hit>
<Hit_num>1</Hit_num>
<Hit_id>sp|P69428.1|</Hit_id>
<Hit_def>RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12] &gt;sp|P69429.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli CFT073] &gt;sp|P69430.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli O157:H7] &gt;sp|P69431.1| RecName: Full=Sec-independent protein translocase protein TatA [Shigella flexneri]</Hit_def>
<Hit_accession>P69428</Hit_accession>
<Hit_len>89</Hit_len>
<Hit_hsps>
<Hsp>
<Hsp_num>1</Hsp_num>
<Hsp_bit-score>177.178</Hsp_bit-score>
<Hsp_score>448</Hsp_score>
<Hsp_evalue>2.3039e-58</Hsp_evalue>
<Hsp_query-from>15</Hsp_query-from>
<Hsp_query-to>103</Hsp_query-to>
<Hsp_hit-from>1</Hsp_hit-from>
<Hsp_hit-to>89</Hsp_hit-to>
<Hsp_query-frame>0</Hsp_query-frame>
<Hsp_hit-frame>0</Hsp_hit-frame>
<Hsp_identity>89</Hsp_identity>
<Hsp_positive>89</Hsp_positive>
<Hsp_gaps>0</Hsp_gaps>
<Hsp_align-len>89</Hsp_align-len>
<Hsp_qseq>MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV</Hsp_qseq>
<Hsp_hseq>MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV</Hsp_hseq>
<Hsp_midline>MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV</Hsp_midline>
</Hsp>
</Hit_hsps>
</Hit>
<Hit>
<Hit_num>2</Hit_num>
<Hit_id>sp|P0A2H3.1|</Hit_id>
<Hit_def>RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2] &gt;sp|P0A2H4.1| RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhi]</Hit_def>
<Hit_accession>P0A2H3</Hit_accession>
<Hit_len>84</Hit_len>
<Hit_hsps>
<Hsp>
<Hsp_num>1</Hsp_num>
<Hsp_bit-score>142.51</Hsp_bit-score>
<Hsp_score>358</Hsp_score>
<Hsp_evalue>1.0691e-44</Hsp_evalue>
<Hsp_query-from>15</Hsp_query-from>
<Hsp_query-to>103</Hsp_query-to>
<Hsp_hit-from>1</Hsp_hit-from>
<Hsp_hit-to>84</Hsp_hit-to>
<Hsp_query-frame>0</Hsp_query-frame>
<Hsp_hit-frame>0</Hsp_hit-frame>
<Hsp_identity>75</Hsp_identity>
<Hsp_positive>79</Hsp_positive>
<Hsp_gaps>5</Hsp_gaps>
<Hsp_align-len>89</Hsp_align-len>
<Hsp_qseq>MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV</Hsp_qseq>
<Hsp_hseq>MGGISIWQLLIVAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDDAKQDKTSQDADFTAKSIADKQG-----EAKKEDAKSQDKEQV</Hsp_hseq>
<Hsp_midline>MGGISIWQLLI+AVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDD+ KQDKTSQDADFTAK+IADKQ +AK EDAK DKEQV</Hsp_midline>
</Hsp>
</Hit_hsps>
</Hit>
</Iteration_hits>
<Iteration_stat>
<Statistics>
<Statistics_db-num>482816</Statistics_db-num>
<Statistics_db-len>183558113</Statistics_db-len>
<Statistics_hsp-len>72</Statistics_hsp-len>
<Statistics_eff-space>4627826878</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
</Iteration>
</BlastOutput_iterations>
</BlastOutput>

File diff suppressed because it is too large Load Diff