Added Infernal tabular format parser and tests

This commit is contained in:
Samuel Prince
2024-09-12 08:23:27 -04:00
committed by Wibowo Arindrarto
parent f452e2e860
commit 3fa9d17dd4
10 changed files with 663 additions and 830 deletions

View File

@ -0,0 +1,16 @@
# Copyright 2024 by Samuel Prince. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
from .infernal_tab import InfernalTabParser
# if not used as a module, run the doctest
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()

View File

@ -0,0 +1,255 @@
# Copyright 2024 by Samuel Prince. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.SearchIO parser for Infernal tabular output format."""
from Bio.SearchIO._index import SearchIndexer
from Bio.SearchIO._model import Hit
from Bio.SearchIO._model import HSP
from Bio.SearchIO._model import HSPFragment
from Bio.SearchIO._model import QueryResult
from Bio.SearchIO.HmmerIO import Hmmer3TabParser
__all__ = ("InfernalTabParser")
# tabular format column names
_TAB_FORMAT = {
1: ("target_name", "target_acc", "query_name", "query_acc", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "description"),
2: ("idx", "target_name", "target_acc", "query_name", "query_acc", "clan", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "olp", "anyidx", "afrct1", "afrct2", "winidx", "wfrct1", "wfrct2", "mdl_len", "seq_len", "description"),
3: ("target_name", "target_acc", "query_name", "query_acc", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "mdl_len", "seq_len", "description")
}
# column to class attribute map
_COLUMN_QRESULT = {
"query_name": ("id", str),
"query_acc": ("accession", str),
"seq_len": ("seq_len", int),
"clan": ("clan", str),
"mdl": ("model", str)
}
_COLUMN_HIT = {
"target_name": ("id", str),
"target_acc": ("accession", str),
"description": ("description", str),
"mdl_len": ("seq_len", int),
}
_COLUMN_HSP = {
"score": ("bitscore", float),
"evalue": ("evalue", float),
"bias": ("bias", float),
"gc": ("gc", float),
"trunc": ("truncated", str),
"pass": ("pipeline_pass", int),
"inc": ("is_included", str),
"olp": ("olp", str),
"anyidx": ("anyidx", str),
"afrct1": ("afrct1", str),
"afrct2": ("afrct2", str),
"winidx": ("winidx", str),
"wfrct1": ("wfrct1", str),
"wfrct2": ("wfrct2", str),
}
_COLUMN_FRAG = {
"mdl_from": ("query_start", int),
"mdl_to": ("query_end", int),
"seq_from": ("hit_start", int),
"seq_to": ("hit_end", int),
"strand": ("hit_strand", str),
}
class InfernalTabParser(Hmmer3TabParser):
"""Parser for the Infernal tabular format."""
def __init__(self, handle):
"""Initialize the class."""
self.handle = handle
self.line = self.handle.readline().strip()
self.fmt = self._find_tabular_format()
def _find_tabular_format(self):
"""Identify the tabular file format from the header (PRIVATE)."""
# skip the first line as some column names contain spaces
self.line = self.handle.readline()
# the second line should always be a header
if not self.line.startswith("#"):
raise ValueError("Expected the first two lines of an Infernal tabular file to be a the header.")
# identify tabular format 1 (default; 18 columns), 2 (29 columns) or 3 (19 columns)
# from the the second header line which does not contain spaces
if len(self.line.split(' ')) == len(_TAB_FORMAT[1]):
fmt = 1
elif len(self.line.split(' ')) == len(_TAB_FORMAT[2]):
fmt = 2
elif len(self.line.split(' ')) == len(_TAB_FORMAT[3]):
fmt = 3
else:
raise ValueError("Unknown Infernal tabular output format. Format 1 (default), 2 and 3 are supported.")
return fmt
def __iter__(self):
"""Iterate over InfernalTabParser, yields query results."""
# read through the footer
while self.line.startswith("#"):
self.line = self.handle.readline()
# if we have result rows, parse it
if self.line:
yield from self._parse_qresult()
def _parse_row(self):
"""Return a dictionary of parsed row values (PRIVATE)."""
cols = [x for x in self.line.strip().split(" ") if x]
if len(cols) < len(_TAB_FORMAT[self.fmt]):
raise ValueError("Less columns than expected for format {}, only {}".format(self.fmt, len(cols)))
# combine extra description columns into one string
cols[len(_TAB_FORMAT[self.fmt])-1] = " ".join(cols[len(_TAB_FORMAT[self.fmt])-1:])
qresult, hit, hsp, frag = {}, {}, {}, {}
for sname, value in zip(_TAB_FORMAT[self.fmt],cols[:len(_TAB_FORMAT[self.fmt])]):
# iterate over each dict, mapping pair to determine
# attribute name and value of each column
for parsed_dict, mapping in (
(qresult, _COLUMN_QRESULT),
(hit, _COLUMN_HIT),
(hsp, _COLUMN_HSP),
(frag, _COLUMN_FRAG),
):
# process parsed value according to mapping
if sname in mapping:
attr_name, caster = mapping[sname]
if caster is not str:
value = caster(value)
parsed_dict[attr_name] = value
# adjust start and end coordinates according to strand
self._adjust_coords(frag)
# convert inclusion string to a bool
self._convert_inclusion(hsp)
return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag}
def _adjust_coords(self, frag):
"""Adjust start and end coordinates according to strand (PRIVATE)."""
strand = frag["hit_strand"]
assert strand is not None
# switch start <--> end coordinates if strand is -1 and the strand to an integer (0 or -1)
if strand == '-':
hit_start = frag["hit_start"]
hit_end = frag["hit_end"]
frag["hit_start"] = hit_end
frag["hit_end"] = hit_start
frag["hit_strand"] = -1
else:
frag["hit_strand"] = 0
def _convert_inclusion(self, hsp):
"""Convert inclusion string to a bool (PRIVATE)."""
is_included = hsp["is_included"]
hsp["is_included"] = True if is_included == '!' else False
def _parse_qresult(self):
"""Yield QueryResult objects (PRIVATE)."""
# state values, determines what to do for each line
state_EOF = 0
state_QRES_NEW = 1
state_QRES_SAME = 3
state_HIT_NEW = 2
state_HIT_SAME = 4
# dummies for initial states
qres_state = None
hit_state = None
file_state = None
cur_qid = None
cur_hid = None
# dummies for initial id caches
prev_qid = None
prev_hid = None
# dummies for initial parsed value containers
cur, prev = None, None
hit_list, hsp_list = [], []
while True:
# store previous line's parsed values for all lines after the first
if cur is not None:
prev = cur
prev_qid = cur_qid
prev_hid = cur_hid
# only parse the result row if it's not EOF or a comment line
if self.line and not self.line.startswith("#"):
cur = self._parse_row()
cur_qid = cur["qresult"]["id"]
cur_hid = cur["hit"]["id"]
else:
file_state = state_EOF
# mock values for cur_qid and cur_hid since the line is empty
cur_qid, cur_hid = None, None
# get the state of hit and qresult
if prev_qid != cur_qid:
qres_state = state_QRES_NEW
else:
qres_state = state_QRES_SAME
# new hits are hits with different id or hits in a new qresult
if prev_hid != cur_hid or qres_state == state_QRES_NEW:
hit_state = state_HIT_NEW
else:
hit_state = state_HIT_SAME
# creating objects for the previously parsed line(s), so nothing is done
# in the first parsed line (prev == None)
if prev is not None:
# create fragment and HSP and set their attributes
frag = HSPFragment(prev_hid, prev_qid)
for attr, value in prev["frag"].items():
setattr(frag, attr, value)
hsp = HSP([frag])
for attr, value in prev["hsp"].items():
setattr(hsp, attr, value)
hsp_list.append(hsp)
# create hit and append to temp hit container if hit_state
# says we're not at the same hit or at a new query
if hit_state == state_HIT_NEW:
hit = Hit(hsp_list)
for attr, value in prev["hit"].items():
setattr(hit, attr, value)
hit_list.append(hit)
hsp_list = []
# create Hit and set its attributes
#hit = Hit([hsp])
#for attr, value in prev["hit"].items():
# setattr(hit, attr, value)
#hit_list.append(hit)
# create qresult and yield if we're at a new qresult or at EOF
if qres_state == state_QRES_NEW or file_state == state_EOF:
qresult = QueryResult(hit_list, prev_qid)
for attr, value in prev["qresult"].items():
setattr(qresult, attr, value)
yield qresult
# if we're at EOF, break
if file_state == state_EOF:
break
hit_list = []
self.line = self.handle.readline()
# if not used as a module, run the doctest
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()

View File

@ -182,6 +182,7 @@ Support for parsing and indexing:
subprograms are hmmscan, hmmsearch, and phmmer.
- hmmer2-text - HMMER2 regular text output format. Supported HMMER2
subprograms are hmmpfam, hmmsearch.
- infernal-tab - Infernal tabular output.
Support for parsing:
@ -221,6 +222,7 @@ _ITERATOR_MAP = {
# as we need it distinguish hit / target coordinates
"hmmscan3-domtab": ("HmmerIO", "Hmmer3DomtabHmmhitParser"),
"hmmsearch3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"),
"infernal-tab": ("InfernalIO", "InfernalTabParser"),
"interproscan-xml": ("InterproscanIO", "InterproscanXmlParser"),
"phmmer3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"),
}

View File

@ -0,0 +1,13 @@
#idx target name accession query name accession clan name mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc olp anyidx afrct1 afrct2 winidx wfrct1 wfrct2 mdl len seq len description of target
#--- -------------------- --------- ----------------------- --------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- --- ------ ------ ------ ------ ------ ------ ------- ------- ---------------------
1 U2 RF00004 ENA|BK006936|BK006936.2 - - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! * - - - - - - 193 813184 U2 spliceosomal RNA
#
# Program: cmscan
# Version: 1.1.5 (Sep 2023)
# Pipeline mode: SCAN
# Query file: BK006936.fasta
# Target file: IRES_5S_U2.cm
# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan-fmt_2.tbl --fmt 2 IRES_5S_U2.cm BK006936.fasta
# Current dir: /analysis/BioPython/Testing
# Date: Thu Sep 12 05:17:28 2024
# [ok]

View File

@ -0,0 +1,13 @@
#target name accession query name accession mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc mdl len seq len description of target
#------------------- --------- ----------------------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- ------- ------- ---------------------
U2 RF00004 ENA|BK006936|BK006936.2 - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! 193 813184 U2 spliceosomal RNA
#
# Program: cmscan
# Version: 1.1.5 (Sep 2023)
# Pipeline mode: SCAN
# Query file: BK006936.fasta
# Target file: IRES_5S_U2.cm
# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan-fmt_3.tbl --fmt 3 IRES_5S_U2.cm BK006936.fasta
# Current dir: /analysis/BioPython/Testing
# Date: Thu Sep 12 05:17:37 2024
# [ok]

View File

@ -5,31 +5,13 @@ U2 RF00004 ENA|BK006935|BK006935.2 - cm 1
U2 RF00004 ENA|BK006936|BK006936.2 - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! U2 spliceosomal RNA
5S_rRNA RF00001 ENA|BK006937|BK006937.2 - cm 1 119 761 644 - no 1 0.41 0.3 14.1 2.4 ? 5S ribosomal RNA
U2 RF00004 ENA|BK006937|BK006937.2 - cm 1 193 229986 229885 - no 1 0.32 0.1 11.1 4.7 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006938|BK006938.2 - cm 1 193 1259500 1259396 - no 1 0.38 0.0 13.1 7.5 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006939|BK006939.2 - cm 1 193 190882 191043 + no 1 0.41 0.0 14.9 1 ? U2 spliceosomal RNA
5S_rRNA RF00001 ENA|BK006943|BK006943.2 - cm 1 119 357031 357144 + no 1 0.46 0.0 20.9 0.1 ? 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006943|BK006943.2 - cm 1 119 359490 359579 + no 1 0.36 0.0 14.6 4.3 ? 5S ribosomal RNA
U2 RF00004 ENA|BK006943|BK006943.2 - cm 45 84 448179 448142 - no 1 0.21 0.0 12.2 5.9 ? U2 spliceosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 459676 459796 + no 1 0.52 0.0 88.8 4.2e-19 ! 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 489349 489469 + no 1 0.52 0.0 88.8 4.2e-19 ! 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 468813 468933 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 472465 472585 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 482045 482165 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA
5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 485697 485817 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA
U2 RF00004 ENA|BK006945|BK006945.2 - cm 1 193 940222 940147 - no 1 0.30 0.0 13.0 5.6 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006947|BK006947.3 - cm 1 193 557234 557379 + no 1 0.34 0.1 15.6 0.97 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006947|BK006947.3 - cm 1 193 266059 266208 + no 1 0.39 0.0 15.3 1.1 ? U2 spliceosomal RNA
5S_rRNA RF00001 ENA|BK006947|BK006947.3 - cm 1 119 7085 6968 - no 1 0.41 0.3 16.7 1.3 ? 5S ribosomal RNA
U2 RF00004 ENA|BK006948|BK006948.2 - cm 1 193 737498 737324 - no 1 0.39 0.0 19.8 0.13 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006948|BK006948.2 - cm 1 193 425490 425693 + no 1 0.34 0.9 13.7 3.7 ? U2 spliceosomal RNA
U2 RF00004 ENA|BK006949|BK006949.2 - cm 1 193 443393 443253 - no 1 0.32 0.4 15.1 1.6 ? U2 spliceosomal RNA
#
# Program: cmscan
# Version: 1.1.4 (Dec 2020)
# Version: 1.1.5 (Sep 2023)
# Pipeline mode: SCAN
# Query file: GCA_000146045.2.fasta
# Query file: BK006935_6_7.fasta
# Target file: IRES_5S_U2.cm
# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan_fmt_1.tbl IRES_5S_U2.cm GCA_000146045.2.fasta
# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan.tbl IRES_5S_U2.cm BK006935_6_7.fasta
# Current dir: /analysis/BioPython/Testing
# Date: Tue Sep 10 11:07:15 2024
# Date: Thu Sep 12 05:06:09 2024
# [ok]

View File

@ -1,16 +1,15 @@
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# INFERNAL 1.1.5 (Sep 2023)
# Copyright (C) 2023 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file: GCA_000146045.2.fasta
# query sequence file: BK006935_6_7.fasta
# target CM database: IRES_5S_U2.cm
# tabular output of hits: IRES_5S_U2_Yeast-cmscan_fmt_1.tbl
# number of worker threads: 56
# tabular output of hits: IRES_5S_U2_Yeast-cmscan.tbl
# number of worker threads: 4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Query: ENA|BK006935|BK006935.2 [L=230218]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome I, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
@ -90,10 +89,9 @@ Envelopes passing glocal HMM envelope defn filter: 84 (0.01354); e
Envelopes passing local CM CYK filter: 3 (0.0002665); expected (0.0001)
Total CM hits reported: 2 (0.0002441); includes 0 truncated hit(s)
# CPU time: 8.92u 0.32s 00:00:09.24 Elapsed: 00:00:08.35
# CPU time: 8.73u 0.20s 00:00:08.93 Elapsed: 00:00:08.28
//
Query: ENA|BK006936|BK006936.2 [L=813184]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
@ -138,10 +136,9 @@ Envelopes passing glocal HMM envelope defn filter: 109 (0.004945);
Envelopes passing local CM CYK filter: 7 (0.0001391); expected (0.0001)
Total CM hits reported: 1 (2.294e-05); includes 0 truncated hit(s)
# CPU time: 10.08u 0.32s 00:00:10.40 Elapsed: 00:00:09.32
# CPU time: 10.01u 0.30s 00:00:10.31 Elapsed: 00:00:09.27
//
Query: ENA|BK006937|BK006937.2 [L=316620]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome III, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
@ -207,768 +204,6 @@ Envelopes passing glocal HMM envelope defn filter: 111 (0.01272); e
Envelopes passing local CM CYK filter: 6 (0.0003184); expected (0.0001)
Total CM hits reported: 2 (0.0001156); includes 0 truncated hit(s)
# CPU time: 10.37u 0.41s 00:00:10.78 Elapsed: 00:00:09.30
//
Query: ENA|BK006938|BK006938.2 [L=1531933]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome IV, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------- ------- --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 7.5 13.1 0.0 U2 1259500 1259396 - cm no 0.38 U2 spliceosomal RNA
Hit alignments:
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 7.5 13.1 0.0 cm 1 193 [] 1259500 1259396 - .. 0.82 no 0.38
v v v NC
::::::<<<-<<<<____>>>>->>>,,,,,..,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<< CS
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG..UGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccc 73
AU +CU GG :UUU : CU+ U AG UGUA UA CUGUUCU::::: UUU AU U+:::::+:G:::
ENA|BK006938|BK006938.2 1259500 AUUGCU----GGGAUUUGUCCUU---UUGAGucUGUAAUACCUGUUCUCUUUG-UUUGAUUUUCAGAGCUGUUUC 1259434
******....************9...99996337******************8.89******9*****7777653 PP
v NC
~~~~~>>>>>>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
U2 74 *[5]*ggggccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
:::C: A +AU U AAA A+CC++
ENA|BK006938|BK006938.2 1259433 *[6]*AGAACAGACAAUCUCAAAA---------------*[ 2]**[ 4]*AACCCCA 1259396
..4..3566666668888887765..................6.....9..******* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (3063866 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 13337 (0.3945); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 4099 (0.1385); expected (0.005)
Windows passing local HMM Forward bias filter: 671 (0.02637); expected (0.005)
Windows passing glocal HMM Forward filter: 325 (0.01925); expected (0.005)
Windows passing glocal HMM Forward bias filter: 193 (0.01067); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 209 (0.004993); expected (0.005)
Envelopes passing local CM CYK filter: 10 (0.0001517); expected (0.0001)
Total CM hits reported: 1 (1.142e-05); includes 0 truncated hit(s)
# CPU time: 19.38u 0.49s 00:00:19.86 Elapsed: 00:00:18.66
//
Query: ENA|BK006942|BK006942.2 [L=439888]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome IX, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (879776 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 3731 (0.3879); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1020 (0.1234); expected (0.005)
Windows passing local HMM Forward bias filter: 156 (0.02172); expected (0.005)
Windows passing glocal HMM Forward filter: 69 (0.01439); expected (0.005)
Windows passing glocal HMM Forward bias filter: 32 (0.006662); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 35 (0.002761); expected (0.005)
Envelopes passing local CM CYK filter: 3 (0.0002074); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 3.28u 0.10s 00:00:03.38 Elapsed: 00:00:02.92
//
Query: ENA|BK006939|BK006939.2 [L=576874]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome V, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 1 14.9 0.0 U2 190882 191043 + cm no 0.41 U2 spliceosomal RNA
Hit alignments:
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 1 14.9 0.0 cm 1 193 [] 190882 191043 + .. 0.92 no 0.41
v v NC
::::::<<<.-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~,,,,,,,,,,,,,,,,,,,,, CS
U2 1 AUacCUUCu.cgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]**[18]*aauuauaUUAaauuaAUUUUU 105
AU CC U : +: C: UUU:G : : AUCA AA ++U+UUAAAU AA UUUU
ENA|BK006939|BK006939.2 190882 AUUCCAUGAuUUCCUGUUUAGCU-UCAUCA-----------------*[ 4]**[ 4]*AACAUUUUUAAAUGAAAUUUU 190939
*******9955678899999976.799997....................9.....9..77699**************** PP
vv v v vv v NC
,,,,,,,<<<<<<<<----.<<<<<_._>>>>>-.........->>>>>>>>,,<<<<<<-<<<<<<__________... CS
U2 106 ggaacuaGugggggcauuu.uggGCU.UGCccau.........ugcccccaCacggguugaccuggcaUUGCAcUac... 171
+ + +GU:: : AUU :G:GCU UGC:C: + : ::ACA+ :: :: : :::: U CAC AC
ENA|BK006939|BK006939.2 190940 AAUGUCUGUUUCCUUAUUGaAGAGCUuUGCUCUGgauuuuccaACAUUAAACAUGCCGCCGAGGCCUCCUCCACCACcac 191019
***********9999998899999964799999999999999999999**************************999888 PP
v NC
.._>>>>>>-->>>>>>::::::: CS
U2 172 ..cgccagguucagcccAcccuuu 193
+:::: :UU:: :: + CU+U
ENA|BK006939|BK006939.2 191020 caUUGGCAUUUGGUGGUGAACUAU 191043
988********************* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1153748 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 4912 (0.3874); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1437 (0.1293); expected (0.005)
Windows passing local HMM Forward bias filter: 213 (0.02265); expected (0.005)
Windows passing glocal HMM Forward filter: 112 (0.01663); expected (0.005)
Windows passing glocal HMM Forward bias filter: 60 (0.008299); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 63 (0.004216); expected (0.005)
Envelopes passing local CM CYK filter: 5 (0.0001253); expected (0.0001)
Total CM hits reported: 1 (4.676e-05); includes 0 truncated hit(s)
# CPU time: 7.19u 0.30s 00:00:07.49 Elapsed: 00:00:06.96
//
Query: ENA|BK006940|BK006940.2 [L=270161]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VI, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (540322 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 2362 (0.3936); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1161 (0.2049); expected (0.02)
Windows passing local HMM Forward bias filter: 445 (0.09149); expected (0.02)
Windows passing glocal HMM Forward filter: 192 (0.06844); expected (0.02)
Windows passing glocal HMM Forward bias filter: 116 (0.03896); expected (0.02)
Envelopes passing glocal HMM envelope defn filter: 127 (0.01684); expected (0.02)
Envelopes passing local CM CYK filter: 3 (0.000165); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 10.53u 0.34s 00:00:10.87 Elapsed: 00:00:10.41
//
Query: ENA|BK006941|BK006941.2 [L=1090940]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VII, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (2181880 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 9492 (0.3967); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 2806 (0.1357); expected (0.005)
Windows passing local HMM Forward bias filter: 435 (0.02432); expected (0.005)
Windows passing glocal HMM Forward filter: 208 (0.01708); expected (0.005)
Windows passing glocal HMM Forward bias filter: 111 (0.008745); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 123 (0.004086); expected (0.005)
Envelopes passing local CM CYK filter: 4 (7.848e-05); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 11.61u 0.39s 00:00:12.00 Elapsed: 00:00:10.64
//
Query: ENA|BK006934|BK006934.2 [L=562643]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VIII, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1125286 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 4854 (0.3931); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1398 (0.1295); expected (0.005)
Windows passing local HMM Forward bias filter: 235 (0.02555); expected (0.005)
Windows passing glocal HMM Forward filter: 116 (0.01844); expected (0.005)
Windows passing glocal HMM Forward bias filter: 70 (0.01048); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 70 (0.004666); expected (0.005)
Envelopes passing local CM CYK filter: 3 (6.155e-05); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 6.55u 0.24s 00:00:06.79 Elapsed: 00:00:06.06
//
Query: ENA|BK006943|BK006943.2 [L=745751]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome X, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 0.1 20.9 0.0 5S_rRNA 357031 357144 + cm no 0.46 5S ribosomal RNA
(2) ? 4.3 14.6 0.0 5S_rRNA 359490 359579 + cm no 0.36 5S ribosomal RNA
(3) ? 5.9 12.2 0.0 U2 448179 448142 - cm no 0.21 U2 spliceosomal RNA
Hit alignments:
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 0.1 20.9 0.0 cm 1 119 [] 357031 357144 + .. 0.86 no 0.46
v vv v vv vv v vv vv NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>>-.->>---->>>>>-->><<<-<<----<-<< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCcgA.AguUAAGcgcgcUugggCcagggUAGUAcu 79
::: GC:G CA A:: G :G+AA: AC:: ++ CAU C ::A A :UAAGC: CU+:: C G:G GUACU
ENA|BK006943|BK006943.2 357031 CAGGGCUGGCAGAGGUGUCGGGAAAAACAAGGAU-CAUAU--CCUUUUAcAAUUAAGCCAUCUACCACCUGAG--GUACU 357105
****************************888743.44433..555555579**********************..***** PP
v v vvv NC
-----<<____>>----->>->-->>->>>)))))).))): CS
5S_rRNA 80 agGaUGgGuGAcCuCcUGggAAgaccagGugccgCa.ggcc 119
A + G CU C GGGAA+A:C+G C:GC :::+
ENA|BK006943|BK006943.2 357106 AAAG-G-AAAGGCUACCGGGAAUAUCUGAAACAGCUgCUGU 357144
9994.3.33334778889****************9879*** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(2) ? 4.3 14.6 0.0 cm 1 119 [] 359490 359579 + .. 0.94 no 0.36
v vv v v v v vv v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>>-->>---->>>>>-->><~~~~~~>.)))))) CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCcgAAguUAAGcgcgcUuggg*[42]*u.gccgCa 115
:::U:::G UA:CAG : AA:CA ::G: UC+ AC:C::AAG: AAG : C++G:G U C:::A
ENA|BK006943|BK006943.2 359490 CUUUAAAGU--UAUCAGUUAACAAGCAGUUGUUGUUUUCAUCACACAGAAGCAAAGCUUUCCAGAG*[14]*UuUCUUUA 359575
********9..66*******************99999999999**********************9...8..459***** PP
NC
))): CS
5S_rRNA 116 ggcc 119
:::
ENA|BK006943|BK006943.2 359576 AAGA 359579
**** PP
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(3) ? 5.9 12.2 0.0 cm 45 84 .. 448179 448142 - .. 0.93 no 0.21
v v vv vv NC
,,<<<<<<________>>>>>>,<<<<<<<___>>>>>>> CS
U2 45 CUUauCAGUuUAAuAuCUGauAuggcccccAuugggggcc 84
CU :U AGUUUAA+AUCU A: UG: :::AU :::+ :C
ENA|BK006943|BK006943.2 448179 CUCUUUAGUUUAACAUCUGAAUUGU--UUUAUAAAAUAAC 448142
*************************..678887888999* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1491502 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 6360 (0.3887); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1764 (0.1256); expected (0.005)
Windows passing local HMM Forward bias filter: 304 (0.02482); expected (0.005)
Windows passing glocal HMM Forward filter: 144 (0.01708); expected (0.005)
Windows passing glocal HMM Forward bias filter: 89 (0.009796); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 93 (0.004667); expected (0.005)
Envelopes passing local CM CYK filter: 7 (0.0001436); expected (0.0001)
Total CM hits reported: 3 (5.404e-05); includes 0 truncated hit(s)
# CPU time: 9.66u 0.39s 00:00:10.05 Elapsed: 00:00:09.09
//
Query: ENA|BK006944|BK006944.2 [L=666816]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XI, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1333632 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 5848 (0.3971); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1755 (0.1375); expected (0.005)
Windows passing local HMM Forward bias filter: 254 (0.02353); expected (0.005)
Windows passing glocal HMM Forward filter: 121 (0.01613); expected (0.005)
Windows passing glocal HMM Forward bias filter: 71 (0.008891); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 79 (0.004272); expected (0.005)
Envelopes passing local CM CYK filter: 6 (0.0001923); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 7.18u 0.22s 00:00:07.40 Elapsed: 00:00:06.87
//
Query: ENA|BK006945|BK006945.2 [L=1078177]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
(1) ! 4.2e-19 88.8 0.0 5S_rRNA 459676 459796 + cm no 0.52 5S ribosomal RNA
(2) ! 4.2e-19 88.8 0.0 5S_rRNA 489349 489469 + cm no 0.52 5S ribosomal RNA
(3) ! 1.2e-17 83.2 0.0 5S_rRNA 468813 468933 + cm no 0.53 5S ribosomal RNA
(4) ! 1.2e-17 83.2 0.0 5S_rRNA 472465 472585 + cm no 0.53 5S ribosomal RNA
(5) ! 1.2e-17 83.2 0.0 5S_rRNA 482045 482165 + cm no 0.53 5S ribosomal RNA
(6) ! 1.2e-17 83.2 0.0 5S_rRNA 485697 485817 + cm no 0.53 5S ribosomal RNA
------ inclusion threshold ------
(7) ? 5.6 13.0 0.0 U2 940222 940147 - cm no 0.30 U2 spliceosomal RNA
Hit alignments:
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ! 4.2e-19 88.8 0.0 cm 1 119 [] 459676 459796 + .. 0.99 no 0.52
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 459676 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 459755
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA::C+
ENA|BK006945|BK006945.2 459756 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 459796
***********************9***************** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(2) ! 4.2e-19 88.8 0.0 cm 1 119 [] 489349 489469 + .. 0.99 no 0.52
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 489349 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 489428
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA::C+
ENA|BK006945|BK006945.2 489429 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 489469
***********************9***************** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(3) ! 1.2e-17 83.2 0.0 cm 1 119 [] 468813 468933 + .. 0.99 no 0.53
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 468813 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 468892
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
ENA|BK006945|BK006945.2 468893 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 468933
***********************9***************** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(4) ! 1.2e-17 83.2 0.0 cm 1 119 [] 472465 472585 + .. 0.99 no 0.53
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 472465 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 472544
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
ENA|BK006945|BK006945.2 472545 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 472585
***********************9***************** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(5) ! 1.2e-17 83.2 0.0 cm 1 119 [] 482045 482165 + .. 0.99 no 0.53
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 482045 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 482124
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
ENA|BK006945|BK006945.2 482125 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 482165
***********************9***************** PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(6) ! 1.2e-17 83.2 0.0 cm 1 119 [] 485697 485817 + .. 0.99 no 0.53
v NC
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
ENA|BK006945|BK006945.2 485697 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 485776
***********************************************99***********************8756**** PP
v vv NC
<-----<<____>>----->>->-->>->>>))))))))): CS
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
ENA|BK006945|BK006945.2 485777 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 485817
***********************9***************** PP
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(7) ? 5.6 13.0 0.0 cm 1 193 [] 940222 940147 - .. 0.93 no 0.30
v v NC
::::::<<<-<<<<____>>>>->>>,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~::::::: CS
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG*[53]*aauuauaUUAaauuaAUUUUUgga*[78]*Acccuuu 193
AU CU C C:UU :G + G UCA G +AUUAU UUAAA U+AUUUUUGG A+C UU
ENA|BK006945|BK006945.2 940222 AUUACUACUGUUCUUUCCAGAAUUGUUCAUG*[11]*UAUUAUCUUAAAAUUAUUUUUGGC*[ 3]*AACGUUA 940147
****************************965...9..666******************986...7..******* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (2156354 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 9345 (0.3951); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 2609 (0.1273); expected (0.005)
Windows passing local HMM Forward bias filter: 426 (0.02429); expected (0.005)
Windows passing glocal HMM Forward filter: 216 (0.01746); expected (0.005)
Windows passing glocal HMM Forward bias filter: 133 (0.01025); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 140 (0.004716); expected (0.005)
Envelopes passing local CM CYK filter: 12 (0.0002032); expected (0.0001)
Total CM hits reported: 7 (0.0001239); includes 0 truncated hit(s)
# CPU time: 12.39u 0.35s 00:00:12.74 Elapsed: 00:00:11.30
//
Query: ENA|BK006946|BK006946.2 [L=924431]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XIII, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
[No hits detected that satisfy reporting thresholds]
Hit alignments:
[No hits detected that satisfy reporting thresholds]
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1848862 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 7943 (0.3913); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 2289 (0.1302); expected (0.005)
Windows passing local HMM Forward bias filter: 347 (0.02363); expected (0.005)
Windows passing glocal HMM Forward filter: 173 (0.01619); expected (0.005)
Windows passing glocal HMM Forward bias filter: 98 (0.008606); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 107 (0.004312); expected (0.005)
Envelopes passing local CM CYK filter: 10 (0.0002205); expected (0.0001)
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
# CPU time: 10.99u 0.27s 00:00:11.26 Elapsed: 00:00:10.70
//
Query: ENA|BK006947|BK006947.3 [L=784333]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 0.97 15.6 0.1 U2 557234 557379 + cm no 0.34 U2 spliceosomal RNA
(2) ? 1.1 15.3 0.0 U2 266059 266208 + cm no 0.39 U2 spliceosomal RNA
(3) ? 1.3 16.7 0.3 5S_rRNA 7085 6968 - cm no 0.41 5S ribosomal RNA
Hit alignments:
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 0.97 15.6 0.1 cm 1 193 [] 557234 557379 + .. 0.77 no 0.34
vv vv v NC
::::::<<<-<<<<____>>>>->>>,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<<<----... CS
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG*[53]*aauuauaUUAaauuaAUUUUUggaacuaGugggggcauuu... 124
U UUCU :G UUU C:AAGAUCAAG AA AUAUUAA+ AA UUUUG+A ++A::::: :: +
ENA|BK006947|BK006947.3 557234 UUUUGUUCUAUGUAAUUUGCCUAAGAUCAAG*[ 8]*AA-CAUAUUAAUAGAACUUUUGAAGUGACAAUCGCGCGAAguu 557314
******************99*********98...7..44.99**********************9999998887766666 PP
v v v v vvvvv vvvvv v NC
...<<~~~~~>>-.->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS
U2 125 ...ug*[8]*cau.ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193
U +:: ::::: +: ::U A U+ :AUU UAC+: U A:: :ACC+ U
ENA|BK006947|BK006947.3 557315 uccAG*[8]*UAUaAUGGGAUUGUUUGCCUUAGGUACAAUUA---UACUU-----GUGAGGGGACCUAGU 557379
44433..4..44467899999999988888754444444433...22222.....46888899******* PP
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(2) ? 1.1 15.3 0.0 cm 1 193 [] 266059 266208 + .. 0.91 no 0.39
v v v NC
::::::.<<<.-<<<<____>>>>->>>,,,,.,,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<< CS
U2 1 AUacCU.UCu.cgGCcUUUUgGCuaaGAUCAA.GUGUAG*[48]*aauuauaUUAaauuaAUUUUUggaacuaGuggggg 119
AU UCU + G C UUG C AGAU A GUGUAG UUAUAU +UU AU UUU G +A:: : G:
ENA|BK006947|BK006947.3 266059 AUGUUGaUCUaUCGUCAAUUGACCCAGAUGAUaGUGUAG*[ 1]*-GUUAUAUAGUUUUGAUAUUUUGGCGAAAAGUUGA 266132
*****9****999****************9988999987...5...3377778888888888888888888888888888 PP
v v v v v v v v v NC
<----.<<<<<__>>>>>-..->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS
U2 120 cauuu.uggGCUUGCccau..ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193
:A+U+ U :GCUUGC: AU +::C : :: G: :AC: G U GCA UA+ C :GU+: :C +U +
ENA|BK006947|BK006947.3 266133 GAAUAuUGCGCUUGCGUAUauAUUCCAUUUGAGGUGGCACUAGAGCUCGCAUUAU-UACCAGUAGUGGCAGGAUUGC 266208
888888**************99999******************************.99******************* PP
>> 5S_rRNA 5S ribosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(3) ? 1.3 16.7 0.3 cm 1 119 [] 7085 6968 - .. 0.91 no 0.41
v v v v v v v NC
(((((((((,,,,<<-<<.<<<.---<<--<<<<.<<______>>-->>>>-->>---->>>>>-->><<<-<<----<-<<-- CS
5S_rRNA 1 gccuGcggcCAUAccagc.gcg.aAagcACcgGa.uCCCAUCcGaACuCcgAAguUAAGcgcgcUugggCcagggUAGUAcuag 81
: :: ::: AUAC + :: G:AC:::: CC AUC+G ::::AA:U AAG :: U+ GGC: :G GUA U+G
ENA|BK006947|BK006947.3 7085 GAGAUGGUAUAUACUGUAgCAUcCGUGUACGUAUgACCGAUCAGA--AUACAAGUGAAGGUGAGUAUGGCAUGUG--GUAGUGG 7006
**************976325541459999****989999999999..89**********9999999*********..******* PP
v NC
---<<____>>----->>->-->>->>>.))))))))): CS
5S_rRNA 82 GaUGgGuGAcCuCcUGggAAgaccagGu.gccgCaggcc 119
GAU :G G : GG AAG+: A:GU ::: :: : C
ENA|BK006947|BK006947.3 7005 GAUUAGAG-UGGUAGGGUAAGUAUAUGUgUAUUAUUUAC 6968
***99988.689999************************ PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1568666 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 6669 (0.3884); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 1739 (0.1193); expected (0.005)
Windows passing local HMM Forward bias filter: 256 (0.02055); expected (0.005)
Windows passing glocal HMM Forward filter: 135 (0.01459); expected (0.005)
Windows passing glocal HMM Forward bias filter: 80 (0.008433); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 90 (0.004393); expected (0.005)
Envelopes passing local CM CYK filter: 6 (0.0001915); expected (0.0001)
Total CM hits reported: 3 (8.791e-05); includes 0 truncated hit(s)
# CPU time: 9.87u 0.47s 00:00:10.34 Elapsed: 00:00:09.67
//
Query: ENA|BK006948|BK006948.2 [L=1091291]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XV, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 0.13 19.8 0.0 U2 737498 737324 - cm no 0.39 U2 spliceosomal RNA
(2) ? 3.7 13.7 0.9 U2 425490 425693 + cm no 0.34 U2 spliceosomal RNA
Hit alignments:
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 0.13 19.8 0.0 cm 1 193 [] 737498 737324 - .. 0.96 no 0.39
NC
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<~~~~~~>>>>>>,<<<<<<<___>>>>> CS
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAG*[ 8]*CUGauAuggcccccAuuggggg 82
AU+CC U U+ GCC U GGC +A AU AAGU UA UA C GUUCU:A::A U::U: ::::::A U:::::
ENA|BK006948|BK006948.2 737498 AUCCCAUAUUUGCCAUC-GGCAUAUAUUAAGUAUAUUAGCAGUUCUAAUUAC*[88]*GUAGUUGGAAGGAUACUAUCCU 737338
**************999.*******************************996...*..6999999999999999999999 PP
NC
>>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
U2 83 ccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
: A+ A CC++U
ENA|BK006948|BK006948.2 737337 UUAU--------------------------*[ 2]**[ 1]*AUCCCCU 737324
9987.............................6.....9..******* PP
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(2) ? 3.7 13.7 0.9 cm 1 193 [] 425490 425693 + .. 0.72 no 0.34
v v v NC
::::::<<<-~~~~~~->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<__~~~~~~>>>> CS
U2 1 AUacCUUCuc*[12]*aaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAu*[ 1]*gggg 81
A +CUUCU+ AAGAUCAAGU U UAUC U U:UC : U A AU: GA:AU:: ::CCA+ GG::
ENA|BK006948|BK006948.2 425490 AAUGCUUCUU*[21]*AAGAUCAAGUUUUUUAUCCUUCGAUUUCAAAUGGAGAUUGGAAAUAUAUUCCAA*[11]*GGAA 425589
*********7...*..5*****************877778888888888899989999999999999986...5..4444 PP
v vvvvvvv vvvvvvv v vvvvv NC
>>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,..<<<<<<<<----~~~~~~->>>>>>>>,,<<<<<<-<<<<<<____ CS
U2 82 gccaau.uauaUUAaauuaAUUUUUggaacua..Gugggggcauuu*[13]*ugcccccaCacggguugaccuggcaUUG 165
::+AU U+U UAAA +A UUUU GAA+U+ G + C+++++C ::UU :AU G
ENA|BK006948|BK006948.2 425590 AAUUAUcUUUGCUAAAACUAGUUUUAGAAAUUggG-----------*[19]*ACCUAAUUCGACUCUUUC-----GAUAG 425666
44444474444444444444444444444444233..............5..344555555555555555.....89*** PP
vvvvv v NC
_______>>>>>>-->>>>>>::::::: CS
U2 166 CAcUaccgccagguucagcccAcccuuu 193
CACU++ :++A+++U AG:: AC ++UU
ENA|BK006948|BK006948.2 425667 CACUUU-CAAAAAAUGAGGAUACAUCUU 425693
******.56666679************* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (2182582 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 9405 (0.3919); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 2786 (0.1338); expected (0.005)
Windows passing local HMM Forward bias filter: 428 (0.02405); expected (0.005)
Windows passing glocal HMM Forward filter: 217 (0.01719); expected (0.005)
Windows passing glocal HMM Forward bias filter: 117 (0.008698); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 122 (0.004139); expected (0.005)
Envelopes passing local CM CYK filter: 7 (0.0001313); expected (0.0001)
Total CM hits reported: 2 (5.785e-05); includes 0 truncated hit(s)
# CPU time: 12.80u 0.35s 00:00:13.15 Elapsed: 00:00:12.47
//
Query: ENA|BK006949|BK006949.2 [L=948066]
Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XVI, complete sequence.
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- --------- ------ ------ --- ----- ---- -----------
------ inclusion threshold ------
(1) ? 1.6 15.1 0.4 U2 443393 443253 - cm no 0.32 U2 spliceosomal RNA
Hit alignments:
>> U2 U2 spliceosomal RNA
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ? 1.6 15.1 0.4 cm 1 193 [] 443393 443253 - .. 0.71 no 0.32
v v v v NC
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~,<<<<<<<___>>>>>>>,,,..,,,,, CS
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]*uggcccccAuugggggccaau..uauaU 92
A U C:+G C G UA:G UCAAG UAGUAU UGUUCU ::: C A+ G :::++U
ENA|BK006949|BK006949.2 443393 CCAUUUACAUGAACCCCAGUUUAUGUUCAAG--UAGUAUAUGUUCU*[ 4]*-UCAACGCAAACUGCUGAUUUcaCC--- 443322
***************999************6..7**********98...4...222222222222222222222222... PP
v v v v v v v NC
,,,,,,,,,,,,,,,,,,,,<<<<<<<<----<<<<<__>>>>>-->>>>>>>>,,<<<<<<-<<<<<<___________ CS
U2 93 UAaauuaAUUUUUggaacuaGugggggcauuuuggGCUUGCccauugcccccaCacggguugaccuggcaUUGCAcUacc 172
:U : :::+U U UU+ ::: : A:A+ ::: G+C: : :A U CAC AC
ENA|BK006949|BK006949.2 443321 --------------------AUUGAAUAUUGU-----UUA------UAUAUGAUAUAUACCGUCAAAUUACUUCACGAC- 443274
....................222222222222.....221......3333345599***********************. PP
v v v NC
>>>>>>-->>>>>>::::::: CS
U2 173 gccagguucagcccAcccuuu 193
: : :G++C ::: + U+
ENA|BK006949|BK006949.2 443273 AGUGUGAACUGUGAUAAAUCA 443253
********************* PP
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (1896132 residues searched)
Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model)
Target model(s): 3 (664 consensus positions)
Windows passing local HMM SSV filter: 8247 (0.3949); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 2447 (0.135); expected (0.005)
Windows passing local HMM Forward bias filter: 399 (0.02634); expected (0.005)
Windows passing glocal HMM Forward filter: 203 (0.01877); expected (0.005)
Windows passing glocal HMM Forward bias filter: 120 (0.01118); expected (0.005)
Envelopes passing glocal HMM envelope defn filter: 129 (0.005262); expected (0.005)
Envelopes passing local CM CYK filter: 7 (0.0001671); expected (0.0001)
Total CM hits reported: 1 (2.477e-05); includes 0 truncated hit(s)
# CPU time: 13.81u 0.55s 00:00:14.36 Elapsed: 00:00:13.09
# CPU time: 10.51u 0.39s 00:00:10.90 Elapsed: 00:00:09.15
//
[ok]

View File

@ -1,35 +0,0 @@
#idx target name accession query name accession clan name mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc olp anyidx afrct1 afrct2 winidx wfrct1 wfrct2 description of target
#--- -------------------- --------- ----------------------- --------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- --- ------ ------ ------ ------ ------ ------ ---------------------
1 U2 RF00004 ENA|BK006935|BK006935.2 - - cm 1 193 52929 53083 + no 1 0.44 0.0 13.5 0.91 ? * - - - - - - U2 spliceosomal RNA
2 U2 RF00004 ENA|BK006935|BK006935.2 - - cm 1 193 196571 196389 - no 1 0.33 5.3 12.8 1.3 ? * - - - - - - U2 spliceosomal RNA
1 U2 RF00004 ENA|BK006936|BK006936.2 - - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! * - - - - - - U2 spliceosomal RNA
1 5S_rRNA RF00001 ENA|BK006937|BK006937.2 - - cm 1 119 761 644 - no 1 0.41 0.3 14.1 2.4 ? * - - - - - - 5S ribosomal RNA
2 U2 RF00004 ENA|BK006937|BK006937.2 - - cm 1 193 229986 229885 - no 1 0.32 0.1 11.1 4.7 ? * - - - - - - U2 spliceosomal RNA
1 U2 RF00004 ENA|BK006938|BK006938.2 - - cm 1 193 1259500 1259396 - no 1 0.38 0.0 13.1 7.5 ? * - - - - - - U2 spliceosomal RNA
1 U2 RF00004 ENA|BK006939|BK006939.2 - - cm 1 193 190882 191043 + no 1 0.41 0.0 14.9 1 ? * - - - - - - U2 spliceosomal RNA
1 5S_rRNA RF00001 ENA|BK006943|BK006943.2 - - cm 1 119 357031 357144 + no 1 0.46 0.0 20.9 0.1 ? * - - - - - - 5S ribosomal RNA
2 5S_rRNA RF00001 ENA|BK006943|BK006943.2 - - cm 1 119 359490 359579 + no 1 0.36 0.0 14.6 4.3 ? * - - - - - - 5S ribosomal RNA
3 U2 RF00004 ENA|BK006943|BK006943.2 - - cm 45 84 448179 448142 - no 1 0.21 0.0 12.2 5.9 ? * - - - - - - U2 spliceosomal RNA
1 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 459676 459796 + no 1 0.52 0.0 88.8 4.2e-19 ! * - - - - - - 5S ribosomal RNA
2 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 489349 489469 + no 1 0.52 0.0 88.8 4.2e-19 ! * - - - - - - 5S ribosomal RNA
3 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 468813 468933 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA
4 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 472465 472585 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA
5 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 482045 482165 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA
6 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 485697 485817 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA
7 U2 RF00004 ENA|BK006945|BK006945.2 - - cm 1 193 940222 940147 - no 1 0.30 0.0 13.0 5.6 ? * - - - - - - U2 spliceosomal RNA
1 U2 RF00004 ENA|BK006947|BK006947.3 - - cm 1 193 557234 557379 + no 1 0.34 0.1 15.6 0.97 ? * - - - - - - U2 spliceosomal RNA
2 U2 RF00004 ENA|BK006947|BK006947.3 - - cm 1 193 266059 266208 + no 1 0.39 0.0 15.3 1.1 ? * - - - - - - U2 spliceosomal RNA
3 5S_rRNA RF00001 ENA|BK006947|BK006947.3 - - cm 1 119 7085 6968 - no 1 0.41 0.3 16.7 1.3 ? * - - - - - - 5S ribosomal RNA
1 U2 RF00004 ENA|BK006948|BK006948.2 - - cm 1 193 737498 737324 - no 1 0.39 0.0 19.8 0.13 ? * - - - - - - U2 spliceosomal RNA
2 U2 RF00004 ENA|BK006948|BK006948.2 - - cm 1 193 425490 425693 + no 1 0.34 0.9 13.7 3.7 ? * - - - - - - U2 spliceosomal RNA
1 U2 RF00004 ENA|BK006949|BK006949.2 - - cm 1 193 443393 443253 - no 1 0.32 0.4 15.1 1.6 ? * - - - - - - U2 spliceosomal RNA
#
# Program: cmscan
# Version: 1.1.4 (Dec 2020)
# Pipeline mode: SCAN
# Query file: GCA_000146045.2.fasta
# Target file: IRES_5S_U2.cm
# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan_fmt_2.tbl --fmt 2 IRES_5S_U2.cm GCA_000146045.2.fasta
# Current dir: /analysis/BioPython/Testing
# Date: Tue Sep 10 11:19:57 2024
# [ok]

View File

@ -136,6 +136,7 @@ class HmmscanCases(unittest.TestCase):
self.assertEqual(1, hit.domain_reported_num)
self.assertEqual(1, hit.domain_included_num)
self.assertEqual("Immunoglobulin domain", hit.description)
# first hsp
hsp = hit.hsps[0]
self.assertEqual(2.1e-09, hsp.evalue)
self.assertEqual(37.6, hsp.bitscore)

View File

@ -0,0 +1,351 @@
# Copyright 2024 by Samuel Prince. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Tests for SearchIO InfernalIO infernal-tab parser."""
import os
import unittest
from Bio.SearchIO import parse
# test case files are in the Blast directory
TEST_DIR = "Infernal"
FMT = "infernal-tab"
def get_file(filename):
"""Return the path of a test file."""
return os.path.join(TEST_DIR, filename)
class CmscanCases(unittest.TestCase):
"""Test parsing cmscan output."""
def test_cmscan_mq_mm(self):
"""Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, default format"""
tab_file = get_file("IRES_5S_U2_Yeast-cmscan.tbl")
qresults = parse(tab_file, FMT)
counter = 0
# first qresult
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("ENA|BK006935|BK006935.2", qresult.id)
self.assertEqual("-", qresult.accession)
self.assertEqual("cm", qresult.model)
hit = qresult[0]
self.assertEqual(2, len(hit))
self.assertEqual("U2", hit.id)
self.assertEqual("RF00004", hit.accession)
self.assertEqual("U2 spliceosomal RNA", hit.description)
# first hsp
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(0.91, hsp.evalue)
self.assertEqual(13.5, hsp.bitscore)
self.assertEqual(0.0, hsp.bias)
self.assertEqual(0.44, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(False, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(52929, frag.hit_start)
self.assertEqual(53083, frag.hit_end)
self.assertEqual(0, frag.hit_strand)
# second hsp
hsp = hit[1]
self.assertEqual(1, len(hsp))
self.assertEqual(1.3, hsp.evalue)
self.assertEqual(12.8, hsp.bitscore)
self.assertEqual(5.3, hsp.bias)
self.assertEqual(0.33, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(False, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(196389, frag.hit_start)
self.assertEqual(196571, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
# second qresult
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("ENA|BK006936|BK006936.2", qresult.id)
self.assertEqual("-", qresult.accession)
self.assertEqual("cm", qresult.model)
hit = qresult[0]
self.assertEqual(1, len(hit))
self.assertEqual("U2", hit.id)
self.assertEqual("RF00004", hit.accession)
self.assertEqual("U2 spliceosomal RNA", hit.description)
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(1.2e-20, hsp.evalue)
self.assertEqual(98.7, hsp.bitscore)
self.assertEqual(0.1, hsp.bias)
self.assertEqual(0.33, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(681747, frag.hit_start)
self.assertEqual(681858, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
# third qresult
qresult = next(qresults)
counter += 1
self.assertEqual(2, len(qresult))
self.assertEqual("ENA|BK006937|BK006937.2", qresult.id)
self.assertEqual("-", qresult.accession)
self.assertEqual("cm", qresult.model)
# first hit
hit = qresult[0]
self.assertEqual(1, len(hit))
self.assertEqual("5S_rRNA", hit.id)
self.assertEqual("RF00001", hit.accession)
self.assertEqual("5S ribosomal RNA", hit.description)
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(2.4, hsp.evalue)
self.assertEqual(14.1, hsp.bitscore)
self.assertEqual(0.3, hsp.bias)
self.assertEqual(0.41, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(False, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(119, frag.query_end)
self.assertEqual(644, frag.hit_start)
self.assertEqual(761, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
# second hit
hit = qresult[1]
self.assertEqual(1, len(hit))
self.assertEqual("U2", hit.id)
self.assertEqual("RF00004", hit.accession)
self.assertEqual("U2 spliceosomal RNA", hit.description)
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(4.7, hsp.evalue)
self.assertEqual(11.1, hsp.bitscore)
self.assertEqual(0.1, hsp.bias)
self.assertEqual(0.32, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(False, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(229885, frag.hit_start)
self.assertEqual(229986, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
def test_cmscan_mq_mm_fmt2(self):
"""Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, fmt 2"""
tab_file = get_file("IRES_5S_U2_Yeast-cmscan-fmt_2.tbl")
qresults = parse(tab_file, FMT)
counter = 0
# first qresult
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("ENA|BK006936|BK006936.2", qresult.id)
self.assertEqual("-", qresult.accession)
self.assertEqual("cm", qresult.model)
self.assertEqual("-", qresult.clan)
self.assertEqual(813184, qresult.seq_len)
hit = qresult[0]
self.assertEqual(1, len(hit))
self.assertEqual("U2", hit.id)
self.assertEqual("RF00004", hit.accession)
self.assertEqual("U2 spliceosomal RNA", hit.description)
self.assertEqual(193, hit.seq_len)
# first hsp
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(1.2e-20, hsp.evalue)
self.assertEqual(98.7, hsp.bitscore)
self.assertEqual(0.1, hsp.bias)
self.assertEqual(0.33, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
self.assertEqual("*", hsp.olp)
self.assertEqual("-", hsp.anyidx)
self.assertEqual("-", hsp.afrct1)
self.assertEqual("-", hsp.afrct2)
self.assertEqual("-", hsp.winidx)
self.assertEqual("-", hsp.wfrct1)
self.assertEqual("-", hsp.wfrct2)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(681747, frag.hit_start)
self.assertEqual(681858, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
def test_cmscan_mq_mm_fmt3(self):
"""Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, fmt 3"""
tab_file = get_file("IRES_5S_U2_Yeast-cmscan-fmt_3.tbl")
qresults = parse(tab_file, FMT)
counter = 0
# first qresult
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("ENA|BK006936|BK006936.2", qresult.id)
self.assertEqual("-", qresult.accession)
self.assertEqual("cm", qresult.model)
self.assertEqual(813184, qresult.seq_len)
hit = qresult[0]
self.assertEqual(1, len(hit))
self.assertEqual("U2", hit.id)
self.assertEqual("RF00004", hit.accession)
self.assertEqual("U2 spliceosomal RNA", hit.description)
self.assertEqual(193, hit.seq_len)
# first hsp
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(1.2e-20, hsp.evalue)
self.assertEqual(98.7, hsp.bitscore)
self.assertEqual(0.1, hsp.bias)
self.assertEqual(0.33, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(681747, frag.hit_start)
self.assertEqual(681858, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
class CmsearchCases(unittest.TestCase):
"""Test parsing cmsearch output."""
def test_1q_0m(self):
"""Test parsing infernal-tab, cmsearch, single query, no hits"""
tab_file = get_file("IRES_Yeast.tbl")
qresults = parse(tab_file, FMT)
self.assertRaises(StopIteration, next, qresults)
def test_cmsearch_1q_1m(self):
"""Test parsing infernal-tab, cmsearch, one queries, one match, one hsp"""
tab_file = get_file("U2_Yeast-threshold.tbl")
qresults = parse(tab_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("U2", qresult.id)
self.assertEqual("RF00004", qresult.accession)
self.assertEqual("cm", qresult.model)
hit = qresult[0]
self.assertEqual(1, len(hit))
self.assertEqual("ENA|BK006936|BK006936.2", hit.id)
self.assertEqual("-", hit.accession)
self.assertEqual("TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.", hit.description)
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(5.9e-20, hsp.evalue)
self.assertEqual(98.7, hsp.bitscore)
self.assertEqual(0.1, hsp.bias)
self.assertEqual(0.33, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(193, frag.query_end)
self.assertEqual(681747, frag.hit_start)
self.assertEqual(681858, frag.hit_end)
self.assertEqual(-1, frag.hit_strand)
# test if we've properly finished iteration
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
def test_cmsearch_1q_mm(self):
"""Test parsing infernal-tab, cmsearch, one queries, multiple match, one hsp"""
tab_file = get_file("5S_Yeast.tbl")
qresults = parse(tab_file, FMT)
counter = 0
qresult = next(qresults)
counter += 1
self.assertEqual(1, len(qresult))
self.assertEqual("5S_rRNA", qresult.id)
self.assertEqual("RF00001", qresult.accession)
self.assertEqual("cm", qresult.model)
# first hit
hit = qresult[0]
self.assertEqual(6, len(hit))
self.assertEqual("ENA|BK006945|BK006945.2", hit.id)
self.assertEqual("-", hit.accession)
self.assertEqual("TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.", hit.description)
hsp = hit[0]
self.assertEqual(1, len(hsp))
self.assertEqual(1.6e-18, hsp.evalue)
self.assertEqual(88.8, hsp.bitscore)
self.assertEqual(0.0, hsp.bias)
self.assertEqual(0.52, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(119, frag.query_end)
self.assertEqual(459676, frag.hit_start)
self.assertEqual(459796, frag.hit_end)
self.assertEqual(0, frag.hit_strand)
# last hit
hsp = hit[-1]
self.assertEqual(1, len(hsp))
self.assertEqual(4.4e-17, hsp.evalue)
self.assertEqual(83.2, hsp.bitscore)
self.assertEqual(0.0, hsp.bias)
self.assertEqual(0.53, hsp.gc)
self.assertEqual("no", hsp.truncated)
self.assertEqual(1, hsp.pipeline_pass)
self.assertEqual(True, hsp.is_included)
frag = hsp[0]
self.assertEqual(1, frag.query_start)
self.assertEqual(119, frag.query_end)
self.assertEqual(485697, frag.hit_start)
self.assertEqual(485817, frag.hit_end)
self.assertEqual(0, frag.hit_strand)
# test if we've properly finished iteration
self.assertRaises(StopIteration, next, qresults)
self.assertEqual(1, counter)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)