diff --git a/Bio/SearchIO/InfernalIO/__init__.py b/Bio/SearchIO/InfernalIO/__init__.py new file mode 100644 index 000000000..7ab20f142 --- /dev/null +++ b/Bio/SearchIO/InfernalIO/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 by Samuel Prince. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. + + +from .infernal_tab import InfernalTabParser + + +# if not used as a module, run the doctest +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/Bio/SearchIO/InfernalIO/infernal_tab.py b/Bio/SearchIO/InfernalIO/infernal_tab.py new file mode 100644 index 000000000..19445d796 --- /dev/null +++ b/Bio/SearchIO/InfernalIO/infernal_tab.py @@ -0,0 +1,255 @@ +# Copyright 2024 by Samuel Prince. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. + +"""Bio.SearchIO parser for Infernal tabular output format.""" + +from Bio.SearchIO._index import SearchIndexer +from Bio.SearchIO._model import Hit +from Bio.SearchIO._model import HSP +from Bio.SearchIO._model import HSPFragment +from Bio.SearchIO._model import QueryResult +from Bio.SearchIO.HmmerIO import Hmmer3TabParser + +__all__ = ("InfernalTabParser") + +# tabular format column names +_TAB_FORMAT = { + 1: ("target_name", "target_acc", "query_name", "query_acc", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "description"), + 2: ("idx", "target_name", "target_acc", "query_name", "query_acc", "clan", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "olp", "anyidx", "afrct1", "afrct2", "winidx", "wfrct1", "wfrct2", "mdl_len", "seq_len", "description"), + 3: ("target_name", "target_acc", "query_name", "query_acc", "mdl", "mdl_from", "mdl_to", "seq_from", "seq_to", "strand", "trunc", "pass", "gc", "bias", "score", "evalue", "inc", "mdl_len", "seq_len", "description") +} + +# column to class attribute map +_COLUMN_QRESULT = { + "query_name": ("id", str), + "query_acc": ("accession", str), + "seq_len": ("seq_len", int), + "clan": ("clan", str), + "mdl": ("model", str) +} +_COLUMN_HIT = { + "target_name": ("id", str), + "target_acc": ("accession", str), + "description": ("description", str), + "mdl_len": ("seq_len", int), +} +_COLUMN_HSP = { + "score": ("bitscore", float), + "evalue": ("evalue", float), + "bias": ("bias", float), + "gc": ("gc", float), + "trunc": ("truncated", str), + "pass": ("pipeline_pass", int), + "inc": ("is_included", str), + "olp": ("olp", str), + "anyidx": ("anyidx", str), + "afrct1": ("afrct1", str), + "afrct2": ("afrct2", str), + "winidx": ("winidx", str), + "wfrct1": ("wfrct1", str), + "wfrct2": ("wfrct2", str), +} +_COLUMN_FRAG = { + "mdl_from": ("query_start", int), + "mdl_to": ("query_end", int), + "seq_from": ("hit_start", int), + "seq_to": ("hit_end", int), + "strand": ("hit_strand", str), +} + + +class InfernalTabParser(Hmmer3TabParser): + """Parser for the Infernal tabular format.""" + + + def __init__(self, handle): + """Initialize the class.""" + self.handle = handle + self.line = self.handle.readline().strip() + self.fmt = self._find_tabular_format() + + + def _find_tabular_format(self): + """Identify the tabular file format from the header (PRIVATE).""" + # skip the first line as some column names contain spaces + self.line = self.handle.readline() + + # the second line should always be a header + if not self.line.startswith("#"): + raise ValueError("Expected the first two lines of an Infernal tabular file to be a the header.") + + # identify tabular format 1 (default; 18 columns), 2 (29 columns) or 3 (19 columns) + # from the the second header line which does not contain spaces + if len(self.line.split(' ')) == len(_TAB_FORMAT[1]): + fmt = 1 + elif len(self.line.split(' ')) == len(_TAB_FORMAT[2]): + fmt = 2 + elif len(self.line.split(' ')) == len(_TAB_FORMAT[3]): + fmt = 3 + else: + raise ValueError("Unknown Infernal tabular output format. Format 1 (default), 2 and 3 are supported.") + + return fmt + + + def __iter__(self): + """Iterate over InfernalTabParser, yields query results.""" + # read through the footer + while self.line.startswith("#"): + self.line = self.handle.readline() + # if we have result rows, parse it + if self.line: + yield from self._parse_qresult() + + + def _parse_row(self): + """Return a dictionary of parsed row values (PRIVATE).""" + cols = [x for x in self.line.strip().split(" ") if x] + if len(cols) < len(_TAB_FORMAT[self.fmt]): + raise ValueError("Less columns than expected for format {}, only {}".format(self.fmt, len(cols))) + # combine extra description columns into one string + cols[len(_TAB_FORMAT[self.fmt])-1] = " ".join(cols[len(_TAB_FORMAT[self.fmt])-1:]) + + qresult, hit, hsp, frag = {}, {}, {}, {} + for sname, value in zip(_TAB_FORMAT[self.fmt],cols[:len(_TAB_FORMAT[self.fmt])]): + # iterate over each dict, mapping pair to determine + # attribute name and value of each column + for parsed_dict, mapping in ( + (qresult, _COLUMN_QRESULT), + (hit, _COLUMN_HIT), + (hsp, _COLUMN_HSP), + (frag, _COLUMN_FRAG), + ): + # process parsed value according to mapping + if sname in mapping: + attr_name, caster = mapping[sname] + if caster is not str: + value = caster(value) + parsed_dict[attr_name] = value + + # adjust start and end coordinates according to strand + self._adjust_coords(frag) + # convert inclusion string to a bool + self._convert_inclusion(hsp) + + return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag} + + + def _adjust_coords(self, frag): + """Adjust start and end coordinates according to strand (PRIVATE).""" + strand = frag["hit_strand"] + assert strand is not None + # switch start <--> end coordinates if strand is -1 and the strand to an integer (0 or -1) + if strand == '-': + hit_start = frag["hit_start"] + hit_end = frag["hit_end"] + frag["hit_start"] = hit_end + frag["hit_end"] = hit_start + frag["hit_strand"] = -1 + else: + frag["hit_strand"] = 0 + + + def _convert_inclusion(self, hsp): + """Convert inclusion string to a bool (PRIVATE).""" + is_included = hsp["is_included"] + hsp["is_included"] = True if is_included == '!' else False + + + def _parse_qresult(self): + """Yield QueryResult objects (PRIVATE).""" + # state values, determines what to do for each line + state_EOF = 0 + state_QRES_NEW = 1 + state_QRES_SAME = 3 + state_HIT_NEW = 2 + state_HIT_SAME = 4 + # dummies for initial states + qres_state = None + hit_state = None + file_state = None + cur_qid = None + cur_hid = None + # dummies for initial id caches + prev_qid = None + prev_hid = None + # dummies for initial parsed value containers + cur, prev = None, None + hit_list, hsp_list = [], [] + + while True: + # store previous line's parsed values for all lines after the first + if cur is not None: + prev = cur + prev_qid = cur_qid + prev_hid = cur_hid + # only parse the result row if it's not EOF or a comment line + if self.line and not self.line.startswith("#"): + cur = self._parse_row() + cur_qid = cur["qresult"]["id"] + cur_hid = cur["hit"]["id"] + else: + file_state = state_EOF + # mock values for cur_qid and cur_hid since the line is empty + cur_qid, cur_hid = None, None + + # get the state of hit and qresult + if prev_qid != cur_qid: + qres_state = state_QRES_NEW + else: + qres_state = state_QRES_SAME + # new hits are hits with different id or hits in a new qresult + if prev_hid != cur_hid or qres_state == state_QRES_NEW: + hit_state = state_HIT_NEW + else: + hit_state = state_HIT_SAME + + # creating objects for the previously parsed line(s), so nothing is done + # in the first parsed line (prev == None) + if prev is not None: + # create fragment and HSP and set their attributes + frag = HSPFragment(prev_hid, prev_qid) + for attr, value in prev["frag"].items(): + setattr(frag, attr, value) + hsp = HSP([frag]) + for attr, value in prev["hsp"].items(): + setattr(hsp, attr, value) + hsp_list.append(hsp) + + # create hit and append to temp hit container if hit_state + # says we're not at the same hit or at a new query + if hit_state == state_HIT_NEW: + hit = Hit(hsp_list) + for attr, value in prev["hit"].items(): + setattr(hit, attr, value) + hit_list.append(hit) + hsp_list = [] + # create Hit and set its attributes + #hit = Hit([hsp]) + #for attr, value in prev["hit"].items(): + # setattr(hit, attr, value) + #hit_list.append(hit) + + # create qresult and yield if we're at a new qresult or at EOF + if qres_state == state_QRES_NEW or file_state == state_EOF: + qresult = QueryResult(hit_list, prev_qid) + for attr, value in prev["qresult"].items(): + setattr(qresult, attr, value) + yield qresult + # if we're at EOF, break + if file_state == state_EOF: + break + hit_list = [] + + self.line = self.handle.readline() + + +# if not used as a module, run the doctest +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/Bio/SearchIO/__init__.py b/Bio/SearchIO/__init__.py index fa117098a..7bd6ad5b1 100644 --- a/Bio/SearchIO/__init__.py +++ b/Bio/SearchIO/__init__.py @@ -182,6 +182,7 @@ Support for parsing and indexing: subprograms are hmmscan, hmmsearch, and phmmer. - hmmer2-text - HMMER2 regular text output format. Supported HMMER2 subprograms are hmmpfam, hmmsearch. + - infernal-tab - Infernal tabular output. Support for parsing: @@ -221,6 +222,7 @@ _ITERATOR_MAP = { # as we need it distinguish hit / target coordinates "hmmscan3-domtab": ("HmmerIO", "Hmmer3DomtabHmmhitParser"), "hmmsearch3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"), + "infernal-tab": ("InfernalIO", "InfernalTabParser"), "interproscan-xml": ("InterproscanIO", "InterproscanXmlParser"), "phmmer3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"), } diff --git a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_2.tbl b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_2.tbl new file mode 100644 index 000000000..b7751a268 --- /dev/null +++ b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_2.tbl @@ -0,0 +1,13 @@ +#idx target name accession query name accession clan name mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc olp anyidx afrct1 afrct2 winidx wfrct1 wfrct2 mdl len seq len description of target +#--- -------------------- --------- ----------------------- --------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- --- ------ ------ ------ ------ ------ ------ ------- ------- --------------------- +1 U2 RF00004 ENA|BK006936|BK006936.2 - - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! * - - - - - - 193 813184 U2 spliceosomal RNA +# +# Program: cmscan +# Version: 1.1.5 (Sep 2023) +# Pipeline mode: SCAN +# Query file: BK006936.fasta +# Target file: IRES_5S_U2.cm +# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan-fmt_2.tbl --fmt 2 IRES_5S_U2.cm BK006936.fasta +# Current dir: /analysis/BioPython/Testing +# Date: Thu Sep 12 05:17:28 2024 +# [ok] diff --git a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_3.tbl b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_3.tbl new file mode 100644 index 000000000..0851fa0ba --- /dev/null +++ b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan-fmt_3.tbl @@ -0,0 +1,13 @@ +#target name accession query name accession mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc mdl len seq len description of target +#------------------- --------- ----------------------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- ------- ------- --------------------- +U2 RF00004 ENA|BK006936|BK006936.2 - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! 193 813184 U2 spliceosomal RNA +# +# Program: cmscan +# Version: 1.1.5 (Sep 2023) +# Pipeline mode: SCAN +# Query file: BK006936.fasta +# Target file: IRES_5S_U2.cm +# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan-fmt_3.tbl --fmt 3 IRES_5S_U2.cm BK006936.fasta +# Current dir: /analysis/BioPython/Testing +# Date: Thu Sep 12 05:17:37 2024 +# [ok] diff --git a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.tbl b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.tbl index f9986c096..e83f0d82c 100644 --- a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.tbl +++ b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.tbl @@ -5,31 +5,13 @@ U2 RF00004 ENA|BK006935|BK006935.2 - cm 1 U2 RF00004 ENA|BK006936|BK006936.2 - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! U2 spliceosomal RNA 5S_rRNA RF00001 ENA|BK006937|BK006937.2 - cm 1 119 761 644 - no 1 0.41 0.3 14.1 2.4 ? 5S ribosomal RNA U2 RF00004 ENA|BK006937|BK006937.2 - cm 1 193 229986 229885 - no 1 0.32 0.1 11.1 4.7 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006938|BK006938.2 - cm 1 193 1259500 1259396 - no 1 0.38 0.0 13.1 7.5 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006939|BK006939.2 - cm 1 193 190882 191043 + no 1 0.41 0.0 14.9 1 ? U2 spliceosomal RNA -5S_rRNA RF00001 ENA|BK006943|BK006943.2 - cm 1 119 357031 357144 + no 1 0.46 0.0 20.9 0.1 ? 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006943|BK006943.2 - cm 1 119 359490 359579 + no 1 0.36 0.0 14.6 4.3 ? 5S ribosomal RNA -U2 RF00004 ENA|BK006943|BK006943.2 - cm 45 84 448179 448142 - no 1 0.21 0.0 12.2 5.9 ? U2 spliceosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 459676 459796 + no 1 0.52 0.0 88.8 4.2e-19 ! 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 489349 489469 + no 1 0.52 0.0 88.8 4.2e-19 ! 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 468813 468933 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 472465 472585 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 482045 482165 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA -5S_rRNA RF00001 ENA|BK006945|BK006945.2 - cm 1 119 485697 485817 + no 1 0.53 0.0 83.2 1.2e-17 ! 5S ribosomal RNA -U2 RF00004 ENA|BK006945|BK006945.2 - cm 1 193 940222 940147 - no 1 0.30 0.0 13.0 5.6 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006947|BK006947.3 - cm 1 193 557234 557379 + no 1 0.34 0.1 15.6 0.97 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006947|BK006947.3 - cm 1 193 266059 266208 + no 1 0.39 0.0 15.3 1.1 ? U2 spliceosomal RNA -5S_rRNA RF00001 ENA|BK006947|BK006947.3 - cm 1 119 7085 6968 - no 1 0.41 0.3 16.7 1.3 ? 5S ribosomal RNA -U2 RF00004 ENA|BK006948|BK006948.2 - cm 1 193 737498 737324 - no 1 0.39 0.0 19.8 0.13 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006948|BK006948.2 - cm 1 193 425490 425693 + no 1 0.34 0.9 13.7 3.7 ? U2 spliceosomal RNA -U2 RF00004 ENA|BK006949|BK006949.2 - cm 1 193 443393 443253 - no 1 0.32 0.4 15.1 1.6 ? U2 spliceosomal RNA # # Program: cmscan -# Version: 1.1.4 (Dec 2020) +# Version: 1.1.5 (Sep 2023) # Pipeline mode: SCAN -# Query file: GCA_000146045.2.fasta +# Query file: BK006935_6_7.fasta # Target file: IRES_5S_U2.cm -# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan_fmt_1.tbl IRES_5S_U2.cm GCA_000146045.2.fasta +# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan.tbl IRES_5S_U2.cm BK006935_6_7.fasta # Current dir: /analysis/BioPython/Testing -# Date: Tue Sep 10 11:07:15 2024 +# Date: Thu Sep 12 05:06:09 2024 # [ok] diff --git a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.txt b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.txt index 05f7b0514..c8a554ed5 100644 --- a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.txt +++ b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan.txt @@ -1,16 +1,15 @@ # cmscan :: search sequence(s) against a CM database -# INFERNAL 1.1.4 (Dec 2020) -# Copyright (C) 2020 Howard Hughes Medical Institute. +# INFERNAL 1.1.5 (Sep 2023) +# Copyright (C) 2023 Howard Hughes Medical Institute. # Freely distributed under the BSD open source license. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: GCA_000146045.2.fasta +# query sequence file: BK006935_6_7.fasta # target CM database: IRES_5S_U2.cm -# tabular output of hits: IRES_5S_U2_Yeast-cmscan_fmt_1.tbl -# number of worker threads: 56 +# tabular output of hits: IRES_5S_U2_Yeast-cmscan.tbl +# number of worker threads: 4 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Query: ENA|BK006935|BK006935.2 [L=230218] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome I, complete sequence. Hit scores: rank E-value score bias modelname start end mdl trunc gc description ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- @@ -90,10 +89,9 @@ Envelopes passing glocal HMM envelope defn filter: 84 (0.01354); e Envelopes passing local CM CYK filter: 3 (0.0002665); expected (0.0001) Total CM hits reported: 2 (0.0002441); includes 0 truncated hit(s) -# CPU time: 8.92u 0.32s 00:00:09.24 Elapsed: 00:00:08.35 +# CPU time: 8.73u 0.20s 00:00:08.93 Elapsed: 00:00:08.28 // Query: ENA|BK006936|BK006936.2 [L=813184] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence. Hit scores: rank E-value score bias modelname start end mdl trunc gc description ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- @@ -138,10 +136,9 @@ Envelopes passing glocal HMM envelope defn filter: 109 (0.004945); Envelopes passing local CM CYK filter: 7 (0.0001391); expected (0.0001) Total CM hits reported: 1 (2.294e-05); includes 0 truncated hit(s) -# CPU time: 10.08u 0.32s 00:00:10.40 Elapsed: 00:00:09.32 +# CPU time: 10.01u 0.30s 00:00:10.31 Elapsed: 00:00:09.27 // Query: ENA|BK006937|BK006937.2 [L=316620] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome III, complete sequence. Hit scores: rank E-value score bias modelname start end mdl trunc gc description ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- @@ -207,768 +204,6 @@ Envelopes passing glocal HMM envelope defn filter: 111 (0.01272); e Envelopes passing local CM CYK filter: 6 (0.0003184); expected (0.0001) Total CM hits reported: 2 (0.0001156); includes 0 truncated hit(s) -# CPU time: 10.37u 0.41s 00:00:10.78 Elapsed: 00:00:09.30 -// -Query: ENA|BK006938|BK006938.2 [L=1531933] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome IV, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------- ------- --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 7.5 13.1 0.0 U2 1259500 1259396 - cm no 0.38 U2 spliceosomal RNA - - -Hit alignments: ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 7.5 13.1 0.0 cm 1 193 [] 1259500 1259396 - .. 0.82 no 0.38 - - v v v NC - ::::::<<<-<<<<____>>>>->>>,,,,,..,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<< CS - U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG..UGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccc 73 - AU +CU GG :UUU : CU+ U AG UGUA UA CUGUUCU::::: UUU AU U+:::::+:G::: - ENA|BK006938|BK006938.2 1259500 AUUGCU----GGGAUUUGUCCUU---UUGAGucUGUAAUACCUGUUCUCUUUG-UUUGAUUUUCAGAGCUGUUUC 1259434 - ******....************9...99996337******************8.89******9*****7777653 PP - - v NC - ~~~~~>>>>>>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS - U2 74 *[5]*ggggccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193 - :::C: A +AU U AAA A+CC++ - ENA|BK006938|BK006938.2 1259433 *[6]*AGAACAGACAAUCUCAAAA---------------*[ 2]**[ 4]*AACCCCA 1259396 - ..4..3566666668888887765..................6.....9..******* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (3063866 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 13337 (0.3945); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 4099 (0.1385); expected (0.005) -Windows passing local HMM Forward bias filter: 671 (0.02637); expected (0.005) -Windows passing glocal HMM Forward filter: 325 (0.01925); expected (0.005) -Windows passing glocal HMM Forward bias filter: 193 (0.01067); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 209 (0.004993); expected (0.005) -Envelopes passing local CM CYK filter: 10 (0.0001517); expected (0.0001) -Total CM hits reported: 1 (1.142e-05); includes 0 truncated hit(s) - -# CPU time: 19.38u 0.49s 00:00:19.86 Elapsed: 00:00:18.66 -// -Query: ENA|BK006942|BK006942.2 [L=439888] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome IX, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (879776 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 3731 (0.3879); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1020 (0.1234); expected (0.005) -Windows passing local HMM Forward bias filter: 156 (0.02172); expected (0.005) -Windows passing glocal HMM Forward filter: 69 (0.01439); expected (0.005) -Windows passing glocal HMM Forward bias filter: 32 (0.006662); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 35 (0.002761); expected (0.005) -Envelopes passing local CM CYK filter: 3 (0.0002074); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 3.28u 0.10s 00:00:03.38 Elapsed: 00:00:02.92 -// -Query: ENA|BK006939|BK006939.2 [L=576874] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome V, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 1 14.9 0.0 U2 190882 191043 + cm no 0.41 U2 spliceosomal RNA - - -Hit alignments: ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 1 14.9 0.0 cm 1 193 [] 190882 191043 + .. 0.92 no 0.41 - - v v NC - ::::::<<<.-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~,,,,,,,,,,,,,,,,,,,,, CS - U2 1 AUacCUUCu.cgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]**[18]*aauuauaUUAaauuaAUUUUU 105 - AU CC U : +: C: UUU:G : : AUCA AA ++U+UUAAAU AA UUUU - ENA|BK006939|BK006939.2 190882 AUUCCAUGAuUUCCUGUUUAGCU-UCAUCA-----------------*[ 4]**[ 4]*AACAUUUUUAAAUGAAAUUUU 190939 - *******9955678899999976.799997....................9.....9..77699**************** PP - - vv v v vv v NC - ,,,,,,,<<<<<<<<----.<<<<<_._>>>>>-.........->>>>>>>>,,<<<<<<-<<<<<<__________... CS - U2 106 ggaacuaGugggggcauuu.uggGCU.UGCccau.........ugcccccaCacggguugaccuggcaUUGCAcUac... 171 - + + +GU:: : AUU :G:GCU UGC:C: + : ::ACA+ :: :: : :::: U CAC AC - ENA|BK006939|BK006939.2 190940 AAUGUCUGUUUCCUUAUUGaAGAGCUuUGCUCUGgauuuuccaACAUUAAACAUGCCGCCGAGGCCUCCUCCACCACcac 191019 - ***********9999998899999964799999999999999999999**************************999888 PP - - v NC - .._>>>>>>-->>>>>>::::::: CS - U2 172 ..cgccagguucagcccAcccuuu 193 - +:::: :UU:: :: + CU+U - ENA|BK006939|BK006939.2 191020 caUUGGCAUUUGGUGGUGAACUAU 191043 - 988********************* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1153748 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 4912 (0.3874); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1437 (0.1293); expected (0.005) -Windows passing local HMM Forward bias filter: 213 (0.02265); expected (0.005) -Windows passing glocal HMM Forward filter: 112 (0.01663); expected (0.005) -Windows passing glocal HMM Forward bias filter: 60 (0.008299); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 63 (0.004216); expected (0.005) -Envelopes passing local CM CYK filter: 5 (0.0001253); expected (0.0001) -Total CM hits reported: 1 (4.676e-05); includes 0 truncated hit(s) - -# CPU time: 7.19u 0.30s 00:00:07.49 Elapsed: 00:00:06.96 -// -Query: ENA|BK006940|BK006940.2 [L=270161] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VI, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (540322 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 2362 (0.3936); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1161 (0.2049); expected (0.02) -Windows passing local HMM Forward bias filter: 445 (0.09149); expected (0.02) -Windows passing glocal HMM Forward filter: 192 (0.06844); expected (0.02) -Windows passing glocal HMM Forward bias filter: 116 (0.03896); expected (0.02) -Envelopes passing glocal HMM envelope defn filter: 127 (0.01684); expected (0.02) -Envelopes passing local CM CYK filter: 3 (0.000165); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 10.53u 0.34s 00:00:10.87 Elapsed: 00:00:10.41 -// -Query: ENA|BK006941|BK006941.2 [L=1090940] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VII, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (2181880 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 9492 (0.3967); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 2806 (0.1357); expected (0.005) -Windows passing local HMM Forward bias filter: 435 (0.02432); expected (0.005) -Windows passing glocal HMM Forward filter: 208 (0.01708); expected (0.005) -Windows passing glocal HMM Forward bias filter: 111 (0.008745); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 123 (0.004086); expected (0.005) -Envelopes passing local CM CYK filter: 4 (7.848e-05); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 11.61u 0.39s 00:00:12.00 Elapsed: 00:00:10.64 -// -Query: ENA|BK006934|BK006934.2 [L=562643] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome VIII, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1125286 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 4854 (0.3931); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1398 (0.1295); expected (0.005) -Windows passing local HMM Forward bias filter: 235 (0.02555); expected (0.005) -Windows passing glocal HMM Forward filter: 116 (0.01844); expected (0.005) -Windows passing glocal HMM Forward bias filter: 70 (0.01048); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 70 (0.004666); expected (0.005) -Envelopes passing local CM CYK filter: 3 (6.155e-05); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 6.55u 0.24s 00:00:06.79 Elapsed: 00:00:06.06 -// -Query: ENA|BK006943|BK006943.2 [L=745751] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome X, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 0.1 20.9 0.0 5S_rRNA 357031 357144 + cm no 0.46 5S ribosomal RNA - (2) ? 4.3 14.6 0.0 5S_rRNA 359490 359579 + cm no 0.36 5S ribosomal RNA - (3) ? 5.9 12.2 0.0 U2 448179 448142 - cm no 0.21 U2 spliceosomal RNA - - -Hit alignments: ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 0.1 20.9 0.0 cm 1 119 [] 357031 357144 + .. 0.86 no 0.46 - - v vv v vv vv v vv vv NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>>-.->>---->>>>>-->><<<-<<----<-<< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCcgA.AguUAAGcgcgcUugggCcagggUAGUAcu 79 - ::: GC:G CA A:: G :G+AA: AC:: ++ CAU C ::A A :UAAGC: CU+:: C G:G GUACU - ENA|BK006943|BK006943.2 357031 CAGGGCUGGCAGAGGUGUCGGGAAAAACAAGGAU-CAUAU--CCUUUUAcAAUUAAGCCAUCUACCACCUGAG--GUACU 357105 - ****************************888743.44433..555555579**********************..***** PP - - v v vvv NC - -----<<____>>----->>->-->>->>>)))))).))): CS - 5S_rRNA 80 agGaUGgGuGAcCuCcUGggAAgaccagGugccgCa.ggcc 119 - A + G CU C GGGAA+A:C+G C:GC :::+ - ENA|BK006943|BK006943.2 357106 AAAG-G-AAAGGCUACCGGGAAUAUCUGAAACAGCUgCUGU 357144 - 9994.3.33334778889****************9879*** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (2) ? 4.3 14.6 0.0 cm 1 119 [] 359490 359579 + .. 0.94 no 0.36 - - v vv v v v v vv v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>>-->>---->>>>>-->><~~~~~~>.)))))) CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCcgAAguUAAGcgcgcUuggg*[42]*u.gccgCa 115 - :::U:::G UA:CAG : AA:CA ::G: UC+ AC:C::AAG: AAG : C++G:G U C:::A - ENA|BK006943|BK006943.2 359490 CUUUAAAGU--UAUCAGUUAACAAGCAGUUGUUGUUUUCAUCACACAGAAGCAAAGCUUUCCAGAG*[14]*UuUCUUUA 359575 - ********9..66*******************99999999999**********************9...8..459***** PP - - NC - ))): CS - 5S_rRNA 116 ggcc 119 - ::: - ENA|BK006943|BK006943.2 359576 AAGA 359579 - **** PP - ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (3) ? 5.9 12.2 0.0 cm 45 84 .. 448179 448142 - .. 0.93 no 0.21 - - v v vv vv NC - ,,<<<<<<________>>>>>>,<<<<<<<___>>>>>>> CS - U2 45 CUUauCAGUuUAAuAuCUGauAuggcccccAuugggggcc 84 - CU :U AGUUUAA+AUCU A: UG: :::AU :::+ :C - ENA|BK006943|BK006943.2 448179 CUCUUUAGUUUAACAUCUGAAUUGU--UUUAUAAAAUAAC 448142 - *************************..678887888999* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1491502 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 6360 (0.3887); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1764 (0.1256); expected (0.005) -Windows passing local HMM Forward bias filter: 304 (0.02482); expected (0.005) -Windows passing glocal HMM Forward filter: 144 (0.01708); expected (0.005) -Windows passing glocal HMM Forward bias filter: 89 (0.009796); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 93 (0.004667); expected (0.005) -Envelopes passing local CM CYK filter: 7 (0.0001436); expected (0.0001) -Total CM hits reported: 3 (5.404e-05); includes 0 truncated hit(s) - -# CPU time: 9.66u 0.39s 00:00:10.05 Elapsed: 00:00:09.09 -// -Query: ENA|BK006944|BK006944.2 [L=666816] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XI, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1333632 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 5848 (0.3971); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1755 (0.1375); expected (0.005) -Windows passing local HMM Forward bias filter: 254 (0.02353); expected (0.005) -Windows passing glocal HMM Forward filter: 121 (0.01613); expected (0.005) -Windows passing glocal HMM Forward bias filter: 71 (0.008891); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 79 (0.004272); expected (0.005) -Envelopes passing local CM CYK filter: 6 (0.0001923); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 7.18u 0.22s 00:00:07.40 Elapsed: 00:00:06.87 -// -Query: ENA|BK006945|BK006945.2 [L=1078177] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - (1) ! 4.2e-19 88.8 0.0 5S_rRNA 459676 459796 + cm no 0.52 5S ribosomal RNA - (2) ! 4.2e-19 88.8 0.0 5S_rRNA 489349 489469 + cm no 0.52 5S ribosomal RNA - (3) ! 1.2e-17 83.2 0.0 5S_rRNA 468813 468933 + cm no 0.53 5S ribosomal RNA - (4) ! 1.2e-17 83.2 0.0 5S_rRNA 472465 472585 + cm no 0.53 5S ribosomal RNA - (5) ! 1.2e-17 83.2 0.0 5S_rRNA 482045 482165 + cm no 0.53 5S ribosomal RNA - (6) ! 1.2e-17 83.2 0.0 5S_rRNA 485697 485817 + cm no 0.53 5S ribosomal RNA - ------ inclusion threshold ------ - (7) ? 5.6 13.0 0.0 U2 940222 940147 - cm no 0.30 U2 spliceosomal RNA - - -Hit alignments: ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ! 4.2e-19 88.8 0.0 cm 1 119 [] 459676 459796 + .. 0.99 no 0.52 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 459676 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 459755 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA::C+ - ENA|BK006945|BK006945.2 459756 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 459796 - ***********************9***************** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (2) ! 4.2e-19 88.8 0.0 cm 1 119 [] 489349 489469 + .. 0.99 no 0.52 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 489349 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 489428 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA::C+ - ENA|BK006945|BK006945.2 489429 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 489469 - ***********************9***************** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (3) ! 1.2e-17 83.2 0.0 cm 1 119 [] 468813 468933 + .. 0.99 no 0.53 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - : UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 468813 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 468892 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA : - ENA|BK006945|BK006945.2 468893 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 468933 - ***********************9***************** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (4) ! 1.2e-17 83.2 0.0 cm 1 119 [] 472465 472585 + .. 0.99 no 0.53 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - : UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 472465 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 472544 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA : - ENA|BK006945|BK006945.2 472545 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 472585 - ***********************9***************** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (5) ! 1.2e-17 83.2 0.0 cm 1 119 [] 482045 482165 + .. 0.99 no 0.53 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - : UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 482045 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 482124 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA : - ENA|BK006945|BK006945.2 482125 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 482165 - ***********************9***************** PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (6) ! 1.2e-17 83.2 0.0 cm 1 119 [] 485697 485817 + .. 0.99 no 0.53 - - v NC - (((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS - 5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78 - : UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA - ENA|BK006945|BK006945.2 485697 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 485776 - ***********************************************99***********************8756**** PP - - v vv NC - <-----<<____>>----->>->-->>->>>))))))))): CS - 5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119 - + +UGGGUGACC+ G AA :CAGGUGC:GCA : - ENA|BK006945|BK006945.2 485777 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 485817 - ***********************9***************** PP - ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (7) ? 5.6 13.0 0.0 cm 1 193 [] 940222 940147 - .. 0.93 no 0.30 - - v v NC - ::::::<<<-<<<<____>>>>->>>,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~::::::: CS - U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG*[53]*aauuauaUUAaauuaAUUUUUgga*[78]*Acccuuu 193 - AU CU C C:UU :G + G UCA G +AUUAU UUAAA U+AUUUUUGG A+C UU - ENA|BK006945|BK006945.2 940222 AUUACUACUGUUCUUUCCAGAAUUGUUCAUG*[11]*UAUUAUCUUAAAAUUAUUUUUGGC*[ 3]*AACGUUA 940147 - ****************************965...9..666******************986...7..******* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (2156354 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 9345 (0.3951); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 2609 (0.1273); expected (0.005) -Windows passing local HMM Forward bias filter: 426 (0.02429); expected (0.005) -Windows passing glocal HMM Forward filter: 216 (0.01746); expected (0.005) -Windows passing glocal HMM Forward bias filter: 133 (0.01025); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 140 (0.004716); expected (0.005) -Envelopes passing local CM CYK filter: 12 (0.0002032); expected (0.0001) -Total CM hits reported: 7 (0.0001239); includes 0 truncated hit(s) - -# CPU time: 12.39u 0.35s 00:00:12.74 Elapsed: 00:00:11.30 -// -Query: ENA|BK006946|BK006946.2 [L=924431] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XIII, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Hit alignments: - - [No hits detected that satisfy reporting thresholds] - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1848862 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 7943 (0.3913); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 2289 (0.1302); expected (0.005) -Windows passing local HMM Forward bias filter: 347 (0.02363); expected (0.005) -Windows passing glocal HMM Forward filter: 173 (0.01619); expected (0.005) -Windows passing glocal HMM Forward bias filter: 98 (0.008606); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 107 (0.004312); expected (0.005) -Envelopes passing local CM CYK filter: 10 (0.0002205); expected (0.0001) -Total CM hits reported: 0 (0); includes 0 truncated hit(s) - -# CPU time: 10.99u 0.27s 00:00:11.26 Elapsed: 00:00:10.70 -// -Query: ENA|BK006947|BK006947.3 [L=784333] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 0.97 15.6 0.1 U2 557234 557379 + cm no 0.34 U2 spliceosomal RNA - (2) ? 1.1 15.3 0.0 U2 266059 266208 + cm no 0.39 U2 spliceosomal RNA - (3) ? 1.3 16.7 0.3 5S_rRNA 7085 6968 - cm no 0.41 5S ribosomal RNA - - -Hit alignments: ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 0.97 15.6 0.1 cm 1 193 [] 557234 557379 + .. 0.77 no 0.34 - - vv vv v NC - ::::::<<<-<<<<____>>>>->>>,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<<<----... CS - U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG*[53]*aauuauaUUAaauuaAUUUUUggaacuaGugggggcauuu... 124 - U UUCU :G UUU C:AAGAUCAAG AA AUAUUAA+ AA UUUUG+A ++A::::: :: + - ENA|BK006947|BK006947.3 557234 UUUUGUUCUAUGUAAUUUGCCUAAGAUCAAG*[ 8]*AA-CAUAUUAAUAGAACUUUUGAAGUGACAAUCGCGCGAAguu 557314 - ******************99*********98...7..44.99**********************9999998887766666 PP - - v v v v vvvvv vvvvv v NC - ...<<~~~~~>>-.->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS - U2 125 ...ug*[8]*cau.ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193 - U +:: ::::: +: ::U A U+ :AUU UAC+: U A:: :ACC+ U - ENA|BK006947|BK006947.3 557315 uccAG*[8]*UAUaAUGGGAUUGUUUGCCUUAGGUACAAUUA---UACUU-----GUGAGGGGACCUAGU 557379 - 44433..4..44467899999999988888754444444433...22222.....46888899******* PP - ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (2) ? 1.1 15.3 0.0 cm 1 193 [] 266059 266208 + .. 0.91 no 0.39 - - v v v NC - ::::::.<<<.-<<<<____>>>>->>>,,,,.,,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<< CS - U2 1 AUacCU.UCu.cgGCcUUUUgGCuaaGAUCAA.GUGUAG*[48]*aauuauaUUAaauuaAUUUUUggaacuaGuggggg 119 - AU UCU + G C UUG C AGAU A GUGUAG UUAUAU +UU AU UUU G +A:: : G: - ENA|BK006947|BK006947.3 266059 AUGUUGaUCUaUCGUCAAUUGACCCAGAUGAUaGUGUAG*[ 1]*-GUUAUAUAGUUUUGAUAUUUUGGCGAAAAGUUGA 266132 - *****9****999****************9988999987...5...3377778888888888888888888888888888 PP - - v v v v v v v v v NC - <----.<<<<<__>>>>>-..->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS - U2 120 cauuu.uggGCUUGCccau..ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193 - :A+U+ U :GCUUGC: AU +::C : :: G: :AC: G U GCA UA+ C :GU+: :C +U + - ENA|BK006947|BK006947.3 266133 GAAUAuUGCGCUUGCGUAUauAUUCCAUUUGAGGUGGCACUAGAGCUCGCAUUAU-UACCAGUAGUGGCAGGAUUGC 266208 - 888888**************99999******************************.99******************* PP - ->> 5S_rRNA 5S ribosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (3) ? 1.3 16.7 0.3 cm 1 119 [] 7085 6968 - .. 0.91 no 0.41 - - v v v v v v v NC - (((((((((,,,,<<-<<.<<<.---<<--<<<<.<<______>>-->>>>-->>---->>>>>-->><<<-<<----<-<<-- CS - 5S_rRNA 1 gccuGcggcCAUAccagc.gcg.aAagcACcgGa.uCCCAUCcGaACuCcgAAguUAAGcgcgcUugggCcagggUAGUAcuag 81 - : :: ::: AUAC + :: G:AC:::: CC AUC+G ::::AA:U AAG :: U+ GGC: :G GUA U+G - ENA|BK006947|BK006947.3 7085 GAGAUGGUAUAUACUGUAgCAUcCGUGUACGUAUgACCGAUCAGA--AUACAAGUGAAGGUGAGUAUGGCAUGUG--GUAGUGG 7006 - **************976325541459999****989999999999..89**********9999999*********..******* PP - - v NC - ---<<____>>----->>->-->>->>>.))))))))): CS - 5S_rRNA 82 GaUGgGuGAcCuCcUGggAAgaccagGu.gccgCaggcc 119 - GAU :G G : GG AAG+: A:GU ::: :: : C - ENA|BK006947|BK006947.3 7005 GAUUAGAG-UGGUAGGGUAAGUAUAUGUgUAUUAUUUAC 6968 - ***99988.689999************************ PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1568666 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 6669 (0.3884); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 1739 (0.1193); expected (0.005) -Windows passing local HMM Forward bias filter: 256 (0.02055); expected (0.005) -Windows passing glocal HMM Forward filter: 135 (0.01459); expected (0.005) -Windows passing glocal HMM Forward bias filter: 80 (0.008433); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 90 (0.004393); expected (0.005) -Envelopes passing local CM CYK filter: 6 (0.0001915); expected (0.0001) -Total CM hits reported: 3 (8.791e-05); includes 0 truncated hit(s) - -# CPU time: 9.87u 0.47s 00:00:10.34 Elapsed: 00:00:09.67 -// -Query: ENA|BK006948|BK006948.2 [L=1091291] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XV, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 0.13 19.8 0.0 U2 737498 737324 - cm no 0.39 U2 spliceosomal RNA - (2) ? 3.7 13.7 0.9 U2 425490 425693 + cm no 0.34 U2 spliceosomal RNA - - -Hit alignments: ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 0.13 19.8 0.0 cm 1 193 [] 737498 737324 - .. 0.96 no 0.39 - - NC - ::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<~~~~~~>>>>>>,<<<<<<<___>>>>> CS - U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAG*[ 8]*CUGauAuggcccccAuuggggg 82 - AU+CC U U+ GCC U GGC +A AU AAGU UA UA C GUUCU:A::A U::U: ::::::A U::::: - ENA|BK006948|BK006948.2 737498 AUCCCAUAUUUGCCAUC-GGCAUAUAUUAAGUAUAUUAGCAGUUCUAAUUAC*[88]*GUAGUUGGAAGGAUACUAUCCU 737338 - **************999.*******************************996...*..6999999999999999999999 PP - - NC - >>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS - U2 83 ccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193 - : A+ A CC++U - ENA|BK006948|BK006948.2 737337 UUAU--------------------------*[ 2]**[ 1]*AUCCCCU 737324 - 9987.............................6.....9..******* PP - ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (2) ? 3.7 13.7 0.9 cm 1 193 [] 425490 425693 + .. 0.72 no 0.34 - - v v v NC - ::::::<<<-~~~~~~->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<__~~~~~~>>>> CS - U2 1 AUacCUUCuc*[12]*aaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAu*[ 1]*gggg 81 - A +CUUCU+ AAGAUCAAGU U UAUC U U:UC : U A AU: GA:AU:: ::CCA+ GG:: - ENA|BK006948|BK006948.2 425490 AAUGCUUCUU*[21]*AAGAUCAAGUUUUUUAUCCUUCGAUUUCAAAUGGAGAUUGGAAAUAUAUUCCAA*[11]*GGAA 425589 - *********7...*..5*****************877778888888888899989999999999999986...5..4444 PP - - v vvvvvvv vvvvvvv v vvvvv NC - >>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,..<<<<<<<<----~~~~~~->>>>>>>>,,<<<<<<-<<<<<<____ CS - U2 82 gccaau.uauaUUAaauuaAUUUUUggaacua..Gugggggcauuu*[13]*ugcccccaCacggguugaccuggcaUUG 165 - ::+AU U+U UAAA +A UUUU GAA+U+ G + C+++++C ::UU :AU G - ENA|BK006948|BK006948.2 425590 AAUUAUcUUUGCUAAAACUAGUUUUAGAAAUUggG-----------*[19]*ACCUAAUUCGACUCUUUC-----GAUAG 425666 - 44444474444444444444444444444444233..............5..344555555555555555.....89*** PP - - vvvvv v NC - _______>>>>>>-->>>>>>::::::: CS - U2 166 CAcUaccgccagguucagcccAcccuuu 193 - CACU++ :++A+++U AG:: AC ++UU - ENA|BK006948|BK006948.2 425667 CACUUU-CAAAAAAUGAGGAUACAUCUU 425693 - ******.56666679************* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (2182582 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 9405 (0.3919); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 2786 (0.1338); expected (0.005) -Windows passing local HMM Forward bias filter: 428 (0.02405); expected (0.005) -Windows passing glocal HMM Forward filter: 217 (0.01719); expected (0.005) -Windows passing glocal HMM Forward bias filter: 117 (0.008698); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 122 (0.004139); expected (0.005) -Envelopes passing local CM CYK filter: 7 (0.0001313); expected (0.0001) -Total CM hits reported: 2 (5.785e-05); includes 0 truncated hit(s) - -# CPU time: 12.80u 0.35s 00:00:13.15 Elapsed: 00:00:12.47 -// -Query: ENA|BK006949|BK006949.2 [L=948066] -Description: TPA_inf: Saccharomyces cerevisiae S288C chromosome XVI, complete sequence. -Hit scores: - rank E-value score bias modelname start end mdl trunc gc description - ---- --------- ------ ----- --------- ------ ------ --- ----- ---- ----------- - ------ inclusion threshold ------ - (1) ? 1.6 15.1 0.4 U2 443393 443253 - cm no 0.32 U2 spliceosomal RNA - - -Hit alignments: ->> U2 U2 spliceosomal RNA - rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc - ---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ---- - (1) ? 1.6 15.1 0.4 cm 1 193 [] 443393 443253 - .. 0.71 no 0.32 - - v v v v NC - ::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~,<<<<<<<___>>>>>>>,,,..,,,,, CS - U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]*uggcccccAuugggggccaau..uauaU 92 - A U C:+G C G UA:G UCAAG UAGUAU UGUUCU ::: C A+ G :::++U - ENA|BK006949|BK006949.2 443393 CCAUUUACAUGAACCCCAGUUUAUGUUCAAG--UAGUAUAUGUUCU*[ 4]*-UCAACGCAAACUGCUGAUUUcaCC--- 443322 - ***************999************6..7**********98...4...222222222222222222222222... PP - - v v v v v v v NC - ,,,,,,,,,,,,,,,,,,,,<<<<<<<<----<<<<<__>>>>>-->>>>>>>>,,<<<<<<-<<<<<<___________ CS - U2 93 UAaauuaAUUUUUggaacuaGugggggcauuuuggGCUUGCccauugcccccaCacggguugaccuggcaUUGCAcUacc 172 - :U : :::+U U UU+ ::: : A:A+ ::: G+C: : :A U CAC AC - ENA|BK006949|BK006949.2 443321 --------------------AUUGAAUAUUGU-----UUA------UAUAUGAUAUAUACCGUCAAAUUACUUCACGAC- 443274 - ....................222222222222.....221......3333345599***********************. PP - - v v v NC - >>>>>>-->>>>>>::::::: CS - U2 173 gccagguucagcccAcccuuu 193 - : : :G++C ::: + U+ - ENA|BK006949|BK006949.2 443273 AGUGUGAACUGUGAUAAAUCA 443253 - ********************* PP - - - -Internal CM pipeline statistics summary: ----------------------------------------- -Query sequence(s): 1 (1896132 residues searched) -Query sequences re-searched for truncated hits: 1 (1166.7 residues re-searched, avg per model) -Target model(s): 3 (664 consensus positions) -Windows passing local HMM SSV filter: 8247 (0.3949); expected (0.35) -Windows passing local HMM Viterbi filter: (off) -Windows passing local HMM Viterbi bias filter: (off) -Windows passing local HMM Forward filter: 2447 (0.135); expected (0.005) -Windows passing local HMM Forward bias filter: 399 (0.02634); expected (0.005) -Windows passing glocal HMM Forward filter: 203 (0.01877); expected (0.005) -Windows passing glocal HMM Forward bias filter: 120 (0.01118); expected (0.005) -Envelopes passing glocal HMM envelope defn filter: 129 (0.005262); expected (0.005) -Envelopes passing local CM CYK filter: 7 (0.0001671); expected (0.0001) -Total CM hits reported: 1 (2.477e-05); includes 0 truncated hit(s) - -# CPU time: 13.81u 0.55s 00:00:14.36 Elapsed: 00:00:13.09 +# CPU time: 10.51u 0.39s 00:00:10.90 Elapsed: 00:00:09.15 // [ok] diff --git a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan_fmt_2.tbl b/Tests/Infernal/IRES_5S_U2_Yeast-cmscan_fmt_2.tbl deleted file mode 100644 index fabd133e6..000000000 --- a/Tests/Infernal/IRES_5S_U2_Yeast-cmscan_fmt_2.tbl +++ /dev/null @@ -1,35 +0,0 @@ -#idx target name accession query name accession clan name mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc olp anyidx afrct1 afrct2 winidx wfrct1 wfrct2 description of target -#--- -------------------- --------- ----------------------- --------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- --- ------ ------ ------ ------ ------ ------ --------------------- -1 U2 RF00004 ENA|BK006935|BK006935.2 - - cm 1 193 52929 53083 + no 1 0.44 0.0 13.5 0.91 ? * - - - - - - U2 spliceosomal RNA -2 U2 RF00004 ENA|BK006935|BK006935.2 - - cm 1 193 196571 196389 - no 1 0.33 5.3 12.8 1.3 ? * - - - - - - U2 spliceosomal RNA -1 U2 RF00004 ENA|BK006936|BK006936.2 - - cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.2e-20 ! * - - - - - - U2 spliceosomal RNA -1 5S_rRNA RF00001 ENA|BK006937|BK006937.2 - - cm 1 119 761 644 - no 1 0.41 0.3 14.1 2.4 ? * - - - - - - 5S ribosomal RNA -2 U2 RF00004 ENA|BK006937|BK006937.2 - - cm 1 193 229986 229885 - no 1 0.32 0.1 11.1 4.7 ? * - - - - - - U2 spliceosomal RNA -1 U2 RF00004 ENA|BK006938|BK006938.2 - - cm 1 193 1259500 1259396 - no 1 0.38 0.0 13.1 7.5 ? * - - - - - - U2 spliceosomal RNA -1 U2 RF00004 ENA|BK006939|BK006939.2 - - cm 1 193 190882 191043 + no 1 0.41 0.0 14.9 1 ? * - - - - - - U2 spliceosomal RNA -1 5S_rRNA RF00001 ENA|BK006943|BK006943.2 - - cm 1 119 357031 357144 + no 1 0.46 0.0 20.9 0.1 ? * - - - - - - 5S ribosomal RNA -2 5S_rRNA RF00001 ENA|BK006943|BK006943.2 - - cm 1 119 359490 359579 + no 1 0.36 0.0 14.6 4.3 ? * - - - - - - 5S ribosomal RNA -3 U2 RF00004 ENA|BK006943|BK006943.2 - - cm 45 84 448179 448142 - no 1 0.21 0.0 12.2 5.9 ? * - - - - - - U2 spliceosomal RNA -1 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 459676 459796 + no 1 0.52 0.0 88.8 4.2e-19 ! * - - - - - - 5S ribosomal RNA -2 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 489349 489469 + no 1 0.52 0.0 88.8 4.2e-19 ! * - - - - - - 5S ribosomal RNA -3 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 468813 468933 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA -4 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 472465 472585 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA -5 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 482045 482165 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA -6 5S_rRNA RF00001 ENA|BK006945|BK006945.2 - - cm 1 119 485697 485817 + no 1 0.53 0.0 83.2 1.2e-17 ! * - - - - - - 5S ribosomal RNA -7 U2 RF00004 ENA|BK006945|BK006945.2 - - cm 1 193 940222 940147 - no 1 0.30 0.0 13.0 5.6 ? * - - - - - - U2 spliceosomal RNA -1 U2 RF00004 ENA|BK006947|BK006947.3 - - cm 1 193 557234 557379 + no 1 0.34 0.1 15.6 0.97 ? * - - - - - - U2 spliceosomal RNA -2 U2 RF00004 ENA|BK006947|BK006947.3 - - cm 1 193 266059 266208 + no 1 0.39 0.0 15.3 1.1 ? * - - - - - - U2 spliceosomal RNA -3 5S_rRNA RF00001 ENA|BK006947|BK006947.3 - - cm 1 119 7085 6968 - no 1 0.41 0.3 16.7 1.3 ? * - - - - - - 5S ribosomal RNA -1 U2 RF00004 ENA|BK006948|BK006948.2 - - cm 1 193 737498 737324 - no 1 0.39 0.0 19.8 0.13 ? * - - - - - - U2 spliceosomal RNA -2 U2 RF00004 ENA|BK006948|BK006948.2 - - cm 1 193 425490 425693 + no 1 0.34 0.9 13.7 3.7 ? * - - - - - - U2 spliceosomal RNA -1 U2 RF00004 ENA|BK006949|BK006949.2 - - cm 1 193 443393 443253 - no 1 0.32 0.4 15.1 1.6 ? * - - - - - - U2 spliceosomal RNA -# -# Program: cmscan -# Version: 1.1.4 (Dec 2020) -# Pipeline mode: SCAN -# Query file: GCA_000146045.2.fasta -# Target file: IRES_5S_U2.cm -# Option settings: cmscan --tblout IRES_5S_U2_Yeast-cmscan_fmt_2.tbl --fmt 2 IRES_5S_U2.cm GCA_000146045.2.fasta -# Current dir: /analysis/BioPython/Testing -# Date: Tue Sep 10 11:19:57 2024 -# [ok] diff --git a/Tests/test_SearchIO_hmmer3_tab.py b/Tests/test_SearchIO_hmmer3_tab.py index 89dc40ae9..840f8f6e7 100644 --- a/Tests/test_SearchIO_hmmer3_tab.py +++ b/Tests/test_SearchIO_hmmer3_tab.py @@ -136,6 +136,7 @@ class HmmscanCases(unittest.TestCase): self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual("Immunoglobulin domain", hit.description) + # first hsp hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) diff --git a/Tests/test_SearchIO_infernal_tab.py b/Tests/test_SearchIO_infernal_tab.py new file mode 100644 index 000000000..88edcc86c --- /dev/null +++ b/Tests/test_SearchIO_infernal_tab.py @@ -0,0 +1,351 @@ +# Copyright 2024 by Samuel Prince. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. + +"""Tests for SearchIO InfernalIO infernal-tab parser.""" + +import os +import unittest + +from Bio.SearchIO import parse + +# test case files are in the Blast directory +TEST_DIR = "Infernal" +FMT = "infernal-tab" + + +def get_file(filename): + """Return the path of a test file.""" + return os.path.join(TEST_DIR, filename) + + +class CmscanCases(unittest.TestCase): + """Test parsing cmscan output.""" + + def test_cmscan_mq_mm(self): + """Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, default format""" + tab_file = get_file("IRES_5S_U2_Yeast-cmscan.tbl") + qresults = parse(tab_file, FMT) + counter = 0 + + # first qresult + qresult = next(qresults) + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("ENA|BK006935|BK006935.2", qresult.id) + self.assertEqual("-", qresult.accession) + self.assertEqual("cm", qresult.model) + hit = qresult[0] + self.assertEqual(2, len(hit)) + self.assertEqual("U2", hit.id) + self.assertEqual("RF00004", hit.accession) + self.assertEqual("U2 spliceosomal RNA", hit.description) + # first hsp + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(0.91, hsp.evalue) + self.assertEqual(13.5, hsp.bitscore) + self.assertEqual(0.0, hsp.bias) + self.assertEqual(0.44, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(False, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(52929, frag.hit_start) + self.assertEqual(53083, frag.hit_end) + self.assertEqual(0, frag.hit_strand) + # second hsp + hsp = hit[1] + self.assertEqual(1, len(hsp)) + self.assertEqual(1.3, hsp.evalue) + self.assertEqual(12.8, hsp.bitscore) + self.assertEqual(5.3, hsp.bias) + self.assertEqual(0.33, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(False, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(196389, frag.hit_start) + self.assertEqual(196571, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + # second qresult + qresult = next(qresults) + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("ENA|BK006936|BK006936.2", qresult.id) + self.assertEqual("-", qresult.accession) + self.assertEqual("cm", qresult.model) + hit = qresult[0] + self.assertEqual(1, len(hit)) + self.assertEqual("U2", hit.id) + self.assertEqual("RF00004", hit.accession) + self.assertEqual("U2 spliceosomal RNA", hit.description) + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(1.2e-20, hsp.evalue) + self.assertEqual(98.7, hsp.bitscore) + self.assertEqual(0.1, hsp.bias) + self.assertEqual(0.33, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(681747, frag.hit_start) + self.assertEqual(681858, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + # third qresult + qresult = next(qresults) + counter += 1 + self.assertEqual(2, len(qresult)) + self.assertEqual("ENA|BK006937|BK006937.2", qresult.id) + self.assertEqual("-", qresult.accession) + self.assertEqual("cm", qresult.model) + # first hit + hit = qresult[0] + self.assertEqual(1, len(hit)) + self.assertEqual("5S_rRNA", hit.id) + self.assertEqual("RF00001", hit.accession) + self.assertEqual("5S ribosomal RNA", hit.description) + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(2.4, hsp.evalue) + self.assertEqual(14.1, hsp.bitscore) + self.assertEqual(0.3, hsp.bias) + self.assertEqual(0.41, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(False, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(119, frag.query_end) + self.assertEqual(644, frag.hit_start) + self.assertEqual(761, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + # second hit + hit = qresult[1] + self.assertEqual(1, len(hit)) + self.assertEqual("U2", hit.id) + self.assertEqual("RF00004", hit.accession) + self.assertEqual("U2 spliceosomal RNA", hit.description) + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(4.7, hsp.evalue) + self.assertEqual(11.1, hsp.bitscore) + self.assertEqual(0.1, hsp.bias) + self.assertEqual(0.32, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(False, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(229885, frag.hit_start) + self.assertEqual(229986, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + + def test_cmscan_mq_mm_fmt2(self): + """Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, fmt 2""" + tab_file = get_file("IRES_5S_U2_Yeast-cmscan-fmt_2.tbl") + qresults = parse(tab_file, FMT) + counter = 0 + + # first qresult + qresult = next(qresults) + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("ENA|BK006936|BK006936.2", qresult.id) + self.assertEqual("-", qresult.accession) + self.assertEqual("cm", qresult.model) + self.assertEqual("-", qresult.clan) + self.assertEqual(813184, qresult.seq_len) + hit = qresult[0] + self.assertEqual(1, len(hit)) + self.assertEqual("U2", hit.id) + self.assertEqual("RF00004", hit.accession) + self.assertEqual("U2 spliceosomal RNA", hit.description) + self.assertEqual(193, hit.seq_len) + # first hsp + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(1.2e-20, hsp.evalue) + self.assertEqual(98.7, hsp.bitscore) + self.assertEqual(0.1, hsp.bias) + self.assertEqual(0.33, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + self.assertEqual("*", hsp.olp) + self.assertEqual("-", hsp.anyidx) + self.assertEqual("-", hsp.afrct1) + self.assertEqual("-", hsp.afrct2) + self.assertEqual("-", hsp.winidx) + self.assertEqual("-", hsp.wfrct1) + self.assertEqual("-", hsp.wfrct2) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(681747, frag.hit_start) + self.assertEqual(681858, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + + def test_cmscan_mq_mm_fmt3(self): + """Test parsing infernal-tab, cmscan, multiple queries, multiple match, one hsp, fmt 3""" + tab_file = get_file("IRES_5S_U2_Yeast-cmscan-fmt_3.tbl") + qresults = parse(tab_file, FMT) + counter = 0 + + # first qresult + qresult = next(qresults) + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("ENA|BK006936|BK006936.2", qresult.id) + self.assertEqual("-", qresult.accession) + self.assertEqual("cm", qresult.model) + self.assertEqual(813184, qresult.seq_len) + hit = qresult[0] + self.assertEqual(1, len(hit)) + self.assertEqual("U2", hit.id) + self.assertEqual("RF00004", hit.accession) + self.assertEqual("U2 spliceosomal RNA", hit.description) + self.assertEqual(193, hit.seq_len) + # first hsp + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(1.2e-20, hsp.evalue) + self.assertEqual(98.7, hsp.bitscore) + self.assertEqual(0.1, hsp.bias) + self.assertEqual(0.33, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(681747, frag.hit_start) + self.assertEqual(681858, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + +class CmsearchCases(unittest.TestCase): + """Test parsing cmsearch output.""" + + + def test_1q_0m(self): + """Test parsing infernal-tab, cmsearch, single query, no hits""" + tab_file = get_file("IRES_Yeast.tbl") + qresults = parse(tab_file, FMT) + + self.assertRaises(StopIteration, next, qresults) + + + def test_cmsearch_1q_1m(self): + """Test parsing infernal-tab, cmsearch, one queries, one match, one hsp""" + tab_file = get_file("U2_Yeast-threshold.tbl") + qresults = parse(tab_file, FMT) + counter = 0 + + qresult = next(qresults) + + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("U2", qresult.id) + self.assertEqual("RF00004", qresult.accession) + self.assertEqual("cm", qresult.model) + hit = qresult[0] + self.assertEqual(1, len(hit)) + self.assertEqual("ENA|BK006936|BK006936.2", hit.id) + self.assertEqual("-", hit.accession) + self.assertEqual("TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.", hit.description) + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(5.9e-20, hsp.evalue) + self.assertEqual(98.7, hsp.bitscore) + self.assertEqual(0.1, hsp.bias) + self.assertEqual(0.33, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(193, frag.query_end) + self.assertEqual(681747, frag.hit_start) + self.assertEqual(681858, frag.hit_end) + self.assertEqual(-1, frag.hit_strand) + + # test if we've properly finished iteration + self.assertRaises(StopIteration, next, qresults) + self.assertEqual(1, counter) + + + def test_cmsearch_1q_mm(self): + """Test parsing infernal-tab, cmsearch, one queries, multiple match, one hsp""" + tab_file = get_file("5S_Yeast.tbl") + qresults = parse(tab_file, FMT) + counter = 0 + + qresult = next(qresults) + counter += 1 + self.assertEqual(1, len(qresult)) + self.assertEqual("5S_rRNA", qresult.id) + self.assertEqual("RF00001", qresult.accession) + self.assertEqual("cm", qresult.model) + # first hit + hit = qresult[0] + self.assertEqual(6, len(hit)) + self.assertEqual("ENA|BK006945|BK006945.2", hit.id) + self.assertEqual("-", hit.accession) + self.assertEqual("TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.", hit.description) + hsp = hit[0] + self.assertEqual(1, len(hsp)) + self.assertEqual(1.6e-18, hsp.evalue) + self.assertEqual(88.8, hsp.bitscore) + self.assertEqual(0.0, hsp.bias) + self.assertEqual(0.52, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(119, frag.query_end) + self.assertEqual(459676, frag.hit_start) + self.assertEqual(459796, frag.hit_end) + self.assertEqual(0, frag.hit_strand) + # last hit + hsp = hit[-1] + self.assertEqual(1, len(hsp)) + self.assertEqual(4.4e-17, hsp.evalue) + self.assertEqual(83.2, hsp.bitscore) + self.assertEqual(0.0, hsp.bias) + self.assertEqual(0.53, hsp.gc) + self.assertEqual("no", hsp.truncated) + self.assertEqual(1, hsp.pipeline_pass) + self.assertEqual(True, hsp.is_included) + frag = hsp[0] + self.assertEqual(1, frag.query_start) + self.assertEqual(119, frag.query_end) + self.assertEqual(485697, frag.hit_start) + self.assertEqual(485817, frag.hit_end) + self.assertEqual(0, frag.hit_strand) + + # test if we've properly finished iteration + self.assertRaises(StopIteration, next, qresults) + self.assertEqual(1, counter) + + + +if __name__ == "__main__": + runner = unittest.TextTestRunner(verbosity=2) + unittest.main(testRunner=runner)