mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
530 lines
37 KiB
Python
530 lines
37 KiB
Python
# Copyright 2024 by Samuel Prince. All rights reserved.
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
|
|
|
|
"""Tests for SearchIO InfernalIO infernal-text indexing"""
|
|
|
|
|
|
import os
|
|
import unittest
|
|
|
|
from search_tests_common import CheckIndex
|
|
from search_tests_common import CheckRaw
|
|
|
|
|
|
class InfernalTabRawCases(CheckRaw):
|
|
fmt = "infernal-text"
|
|
|
|
def test_infernal_text_1q(self):
|
|
"""Test infernal-text raw string retrieval, cmsearch, one query (U2_Yeast)."""
|
|
filename = os.path.join("Infernal", "cmsearch_114_U2_Yeast.txt")
|
|
raw = """# cmsearch :: search CM(s) against a sequence database
|
|
# INFERNAL 1.1.4 (Dec 2020)
|
|
# Copyright (C) 2020 Howard Hughes Medical Institute.
|
|
# Freely distributed under the BSD open source license.
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# query CM file: RF00004.cm
|
|
# target sequence database: GCA_000146045.2.fasta
|
|
# tabular output of hits: U2_Yeast-threshold.tbl
|
|
# sequence reporting threshold: score >= 46
|
|
# number of worker threads: 56
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
Query: U2 [CLEN=193]
|
|
Accession: RF00004
|
|
Description: U2 spliceosomal RNA
|
|
Hit scores:
|
|
rank E-value score bias sequence start end mdl trunc gc description
|
|
---- --------- ------ ----- ----------------------- ------ ------ --- ----- ---- -----------
|
|
(1) ! 5.9e-20 98.7 0.1 ENA|BK006936|BK006936.2 681858 681747 - cm no 0.33 TPA_inf: Saccharomyces cerevisiae S288C chromosome II,
|
|
|
|
|
|
Hit alignments:
|
|
>> ENA|BK006936|BK006936.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(1) ! 5.9e-20 98.7 0.1 cm 1 193 [] 681858 681747 - .. 0.91 no 0.33
|
|
|
|
v NC
|
|
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<___>>> CS
|
|
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAuuggg 80
|
|
AU+ UCU+:GCCUUUUGGC:+AGAUCAAGUGUAGUAUCUGUUCUU:UCAGU+UAA+A+CUGA:AUG: CC:CA+UG:G
|
|
ENA|BK006936|BK006936.2 681858 AUC---UCUUUGCCUUUUGGCUUAGAUCAAGUGUAGUAUCUGUUCUUUUCAGUGUAACAACUGAAAUGA-CCUCAAUGAG 681783
|
|
***...************************************************************999.********** PP
|
|
|
|
v NC
|
|
>>>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
|
|
U2 81 ggccaau.uauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
|
|
G+:CA+U U+UUAA+UU AC +UUU
|
|
ENA|BK006936|BK006936.2 681782 GCUCAUUaCCUUUUAAUUUG-------------*[ 6]**[ 3]*ACAUUUU 681747
|
|
******86555555555443................7.....9..******* PP
|
|
|
|
|
|
|
|
Internal CM pipeline statistics summary:
|
|
----------------------------------------
|
|
Query model(s): 1 (193 consensus positions)
|
|
Target sequences: 16 (24142652 residues searched)
|
|
Target sequences re-searched for truncated hits: 16 (15424 residues re-searched)
|
|
Windows passing local HMM SSV filter: 72732 (0.7978); expected (0.35)
|
|
Windows passing local HMM Viterbi filter: 24175 (0.3233); expected (0.15)
|
|
Windows passing local HMM Viterbi bias filter: 6671 (0.09669); expected (0.15)
|
|
Windows passing local HMM Forward filter: 2037 (0.03161); expected (0.003)
|
|
Windows passing local HMM Forward bias filter: 1133 (0.01757); expected (0.003)
|
|
Windows passing glocal HMM Forward filter: 596 (0.01251); expected (0.003)
|
|
Windows passing glocal HMM Forward bias filter: 438 (0.009175); expected (0.003)
|
|
Envelopes passing glocal HMM envelope defn filter: 460 (0.00429); expected (0.003)
|
|
Envelopes passing local CM CYK filter: 38 (0.000201); expected (0.0001)
|
|
Total CM hits reported: 1 (4.636e-06); includes 0 truncated hit(s)
|
|
|
|
# CPU time: 64.67u 2.16s 00:01:06.83 Elapsed: 00:00:03.09
|
|
//
|
|
"""
|
|
self.check_raw(filename, "U2", raw)
|
|
|
|
def test_infernal_text_mq_first(self):
|
|
"""Test infernal-text raw string retrieval, cmsearch, multiple queries, first (IRES_5S_U2_Yeast)."""
|
|
filename = os.path.join("Infernal", "cmsearch_114_IRES_5S_U2_Yeast.txt")
|
|
raw = """# cmsearch :: search CM(s) against a sequence database
|
|
# INFERNAL 1.1.4 (Dec 2020)
|
|
# Copyright (C) 2020 Howard Hughes Medical Institute.
|
|
# Freely distributed under the BSD open source license.
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# query CM file: IRES_5S_U2.cm
|
|
# target sequence database: GCA_000146045.2.fasta
|
|
# tabular output of hits: IRES_5S_U2_Yeast.tbl
|
|
# number of worker threads: 56
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
Query: IRES_HCV [CLEN=352]
|
|
Accession: RF00061
|
|
Description: Hepatitis C virus internal ribosome entry site
|
|
Hit scores:
|
|
rank E-value score bias sequence start end mdl trunc gc description
|
|
---- --------- ------ ----- -------- ------ ------ --- ----- ---- -----------
|
|
|
|
[No hits detected that satisfy reporting thresholds]
|
|
|
|
|
|
Hit alignments:
|
|
|
|
[No hits detected that satisfy reporting thresholds]
|
|
|
|
|
|
Internal CM pipeline statistics summary:
|
|
----------------------------------------
|
|
Query model(s): 1 (352 consensus positions)
|
|
Target sequences: 16 (24142652 residues searched)
|
|
Target sequences re-searched for truncated hits: 16 (28160 residues re-searched)
|
|
Windows passing local HMM SSV filter: 6432 (0.1374); expected (0.35)
|
|
Windows passing local HMM Viterbi filter: 1631 (0.03555); expected (0.15)
|
|
Windows passing local HMM Viterbi bias filter: 1607 (0.03502); expected (0.15)
|
|
Windows passing local HMM Forward filter: 9 (0.0001992); expected (0.003)
|
|
Windows passing local HMM Forward bias filter: 9 (0.0001992); expected (0.003)
|
|
Windows passing glocal HMM Forward filter: 1 (2.16e-05); expected (0.003)
|
|
Windows passing glocal HMM Forward bias filter: 1 (2.16e-05); expected (0.003)
|
|
Envelopes passing glocal HMM envelope defn filter: 1 (1.705e-05); expected (0.003)
|
|
Envelopes passing local CM CYK filter: 0 (0); expected (0.0001)
|
|
Total CM hits reported: 0 (0); includes 0 truncated hit(s)
|
|
|
|
# CPU time: 3.98u 0.19s 00:00:04.17 Elapsed: 00:00:00.87
|
|
//
|
|
"""
|
|
self.check_raw(filename, "IRES_HCV", raw)
|
|
|
|
def test_infernal_text_mq_middle(self):
|
|
"""Test infernal-text raw string retrieval, cmsearch, multiple queries, middle (IRES_5S_U2_Yeast)."""
|
|
filename = os.path.join("Infernal", "cmsearch_114_IRES_5S_U2_Yeast.txt")
|
|
raw = """# cmsearch :: search CM(s) against a sequence database
|
|
# INFERNAL 1.1.4 (Dec 2020)
|
|
# Copyright (C) 2020 Howard Hughes Medical Institute.
|
|
# Freely distributed under the BSD open source license.
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# query CM file: IRES_5S_U2.cm
|
|
# target sequence database: GCA_000146045.2.fasta
|
|
# tabular output of hits: IRES_5S_U2_Yeast.tbl
|
|
# number of worker threads: 56
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
Query: 5S_rRNA [CLEN=119]
|
|
Accession: RF00001
|
|
Description: 5S ribosomal RNA
|
|
Hit scores:
|
|
rank E-value score bias sequence start end mdl trunc gc description
|
|
---- --------- ------ ----- ----------------------- ------ ------ --- ----- ---- -----------
|
|
(1) ! 1.6e-18 88.8 0.0 ENA|BK006945|BK006945.2 459676 459796 + cm no 0.52 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
(2) ! 1.6e-18 88.8 0.0 ENA|BK006945|BK006945.2 489349 489469 + cm no 0.52 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
(3) ! 4.4e-17 83.2 0.0 ENA|BK006945|BK006945.2 468813 468933 + cm no 0.53 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
(4) ! 4.4e-17 83.2 0.0 ENA|BK006945|BK006945.2 472465 472585 + cm no 0.53 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
(5) ! 4.4e-17 83.2 0.0 ENA|BK006945|BK006945.2 482045 482165 + cm no 0.53 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
(6) ! 4.4e-17 83.2 0.0 ENA|BK006945|BK006945.2 485697 485817 + cm no 0.53 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII
|
|
------ inclusion threshold ------
|
|
(7) ? 0.56 20.9 0.0 ENA|BK006943|BK006943.2 357031 357144 + cm no 0.46 TPA_inf: Saccharomyces cerevisiae S288C chromosome X,
|
|
(8) ? 6.6 16.7 0.3 ENA|BK006947|BK006947.3 7085 6968 - cm no 0.41 TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV
|
|
|
|
|
|
Hit alignments:
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(1) ! 1.6e-18 88.8 0.0 cm 1 119 [] 459676 459796 + .. 0.99 no 0.52
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 459676 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 459755
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA::C+
|
|
ENA|BK006945|BK006945.2 459756 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 459796
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(2) ! 1.6e-18 88.8 0.0 cm 1 119 [] 489349 489469 + .. 0.99 no 0.52
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
G::UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 489349 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 489428
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA::C+
|
|
ENA|BK006945|BK006945.2 489429 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU 489469
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(3) ! 4.4e-17 83.2 0.0 cm 1 119 [] 468813 468933 + .. 0.99 no 0.53
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 468813 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 468892
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
|
|
ENA|BK006945|BK006945.2 468893 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 468933
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(4) ! 4.4e-17 83.2 0.0 cm 1 119 [] 472465 472585 + .. 0.99 no 0.53
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 472465 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 472544
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
|
|
ENA|BK006945|BK006945.2 472545 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 472585
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(5) ! 4.4e-17 83.2 0.0 cm 1 119 [] 482045 482165 + .. 0.99 no 0.53
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 482045 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 482124
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
|
|
ENA|BK006945|BK006945.2 482125 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 482165
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006945|BK006945.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XII, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(6) ! 4.4e-17 83.2 0.0 cm 1 119 [] 485697 485817 + .. 0.99 no 0.53
|
|
|
|
v NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>.>-->>---->>>>>-->><<<-<<---.-<-< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCc.gAAguUAAGcgcgcUugggCcagggUA.GUAc 78
|
|
: UGC:GCCAUA:C :C::GAAAGCACCG :UCCC+UCCGA C: C G AGUUAAGC::G: +G:GCC G: GUA
|
|
ENA|BK006945|BK006945.2 485697 GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACuGUAGUUAAGCUGGUAAGAGCCUGACCGaGUAG 485776
|
|
***********************************************99***********************8756**** PP
|
|
|
|
v vv NC
|
|
<-----<<____>>----->>->-->>->>>))))))))): CS
|
|
5S_rRNA 79 uagGaUGgGuGAcCuCcUGggAAgaccagGugccgCaggcc 119
|
|
+ +UGGGUGACC+ G AA :CAGGUGC:GCA :
|
|
ENA|BK006945|BK006945.2 485777 UGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAGUUG 485817
|
|
***********************9***************** PP
|
|
|
|
>> ENA|BK006943|BK006943.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome X, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(7) ? 0.56 20.9 0.0 cm 1 119 [] 357031 357144 + .. 0.86 no 0.46
|
|
|
|
v vv v vv vv v vv vv NC
|
|
(((((((((,,,,<<-<<<<<---<<--<<<<<<______>>-->>>>-.->>---->>>>>-->><<<-<<----<-<< CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagcgcgaAagcACcgGauCCCAUCcGaACuCcgA.AguUAAGcgcgcUugggCcagggUAGUAcu 79
|
|
::: GC:G CA A:: G :G+AA: AC:: ++ CAU C ::A A :UAAGC: CU+:: C G:G GUACU
|
|
ENA|BK006943|BK006943.2 357031 CAGGGCUGGCAGAGGUGUCGGGAAAAACAAGGAU-CAUAU--CCUUUUAcAAUUAAGCCAUCUACCACCUGAG--GUACU 357105
|
|
****************************888743.44433..555555579**********************..***** PP
|
|
|
|
v v vvv NC
|
|
-----<<____>>----->>->-->>->>>)))))).))): CS
|
|
5S_rRNA 80 agGaUGgGuGAcCuCcUGggAAgaccagGugccgCa.ggcc 119
|
|
A + G CU C GGGAA+A:C+G C:GC :::+
|
|
ENA|BK006943|BK006943.2 357106 AAAG-G-AAAGGCUACCGGGAAUAUCUGAAACAGCUgCUGU 357144
|
|
9994.3.33334778889****************9879*** PP
|
|
|
|
>> ENA|BK006947|BK006947.3 TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(8) ? 6.6 16.7 0.3 cm 1 119 [] 7085 6968 - .. 0.91 no 0.41
|
|
|
|
v v v v v v v NC
|
|
(((((((((,,,,<<-<<.<<<.---<<--<<<<.<<______>>-->>>>-->>---->>>>>-->><<<-<<----<-<<-- CS
|
|
5S_rRNA 1 gccuGcggcCAUAccagc.gcg.aAagcACcgGa.uCCCAUCcGaACuCcgAAguUAAGcgcgcUugggCcagggUAGUAcuag 81
|
|
: :: ::: AUAC + :: G:AC:::: CC AUC+G ::::AA:U AAG :: U+ GGC: :G GUA U+G
|
|
ENA|BK006947|BK006947.3 7085 GAGAUGGUAUAUACUGUAgCAUcCGUGUACGUAUgACCGAUCAGA--AUACAAGUGAAGGUGAGUAUGGCAUGUG--GUAGUGG 7006
|
|
**************976325541459999****989999999999..89**********9999999*********..******* PP
|
|
|
|
v NC
|
|
---<<____>>----->>->-->>->>>.))))))))): CS
|
|
5S_rRNA 82 GaUGgGuGAcCuCcUGggAAgaccagGu.gccgCaggcc 119
|
|
GAU :G G : GG AAG+: A:GU ::: :: : C
|
|
ENA|BK006947|BK006947.3 7005 GAUUAGAG-UGGUAGGGUAAGUAUAUGUgUAUUAUUUAC 6968
|
|
***99988.689999************************ PP
|
|
|
|
|
|
|
|
Internal CM pipeline statistics summary:
|
|
----------------------------------------
|
|
Query model(s): 1 (119 consensus positions)
|
|
Target sequences: 16 (24142652 residues searched)
|
|
Target sequences re-searched for truncated hits: 16 (12416 residues re-searched)
|
|
Windows passing local HMM SSV filter: 24991 (0.2423); expected (0.35)
|
|
Windows passing local HMM Viterbi filter: 8464 (0.08504); expected (0.15)
|
|
Windows passing local HMM Viterbi bias filter: 8432 (0.08473); expected (0.15)
|
|
Windows passing local HMM Forward filter: 135 (0.001502); expected (0.003)
|
|
Windows passing local HMM Forward bias filter: 134 (0.001493); expected (0.003)
|
|
Windows passing glocal HMM Forward filter: 65 (0.0007404); expected (0.003)
|
|
Windows passing glocal HMM Forward bias filter: 65 (0.0007404); expected (0.003)
|
|
Envelopes passing glocal HMM envelope defn filter: 61 (0.0003446); expected (0.003)
|
|
Envelopes passing local CM CYK filter: 15 (6.52e-05); expected (0.0001)
|
|
Total CM hits reported: 8 (3.966e-05); includes 0 truncated hit(s)
|
|
|
|
# CPU time: 4.17u 0.21s 00:00:04.38 Elapsed: 00:00:00.40
|
|
//
|
|
"""
|
|
self.check_raw(filename, "5S_rRNA", raw)
|
|
|
|
def test_infernal_text_mq_last(self):
|
|
"""Test infernal-text raw string retrieval, cmsearch, multiple queries, last (IRES_5S_U2_Yeast)."""
|
|
filename = os.path.join("Infernal", "cmsearch_114_IRES_5S_U2_Yeast.txt")
|
|
raw = """# cmsearch :: search CM(s) against a sequence database
|
|
# INFERNAL 1.1.4 (Dec 2020)
|
|
# Copyright (C) 2020 Howard Hughes Medical Institute.
|
|
# Freely distributed under the BSD open source license.
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# query CM file: IRES_5S_U2.cm
|
|
# target sequence database: GCA_000146045.2.fasta
|
|
# tabular output of hits: IRES_5S_U2_Yeast.tbl
|
|
# number of worker threads: 56
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
Query: U2 [CLEN=193]
|
|
Accession: RF00004
|
|
Description: U2 spliceosomal RNA
|
|
Hit scores:
|
|
rank E-value score bias sequence start end mdl trunc gc description
|
|
---- --------- ------ ----- ----------------------- ------ ------ --- ----- ---- -----------
|
|
(1) ! 5.9e-20 98.7 0.1 ENA|BK006936|BK006936.2 681858 681747 - cm no 0.33 TPA_inf: Saccharomyces cerevisiae S288C chromosome II,
|
|
------ inclusion threshold ------
|
|
(2) ? 0.49 19.8 0.0 ENA|BK006948|BK006948.2 737498 737324 - cm no 0.39 TPA_inf: Saccharomyces cerevisiae S288C chromosome XV,
|
|
(3) ? 5.7 15.3 0.0 ENA|BK006947|BK006947.3 266059 266208 + cm no 0.39 TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV
|
|
(4) ? 6.6 15.1 0.4 ENA|BK006949|BK006949.2 443393 443253 - cm no 0.32 TPA_inf: Saccharomyces cerevisiae S288C chromosome XVI
|
|
(5) ? 7.1 14.9 0.0 ENA|BK006939|BK006939.2 190882 191043 + cm no 0.41 TPA_inf: Saccharomyces cerevisiae S288C chromosome V,
|
|
|
|
|
|
Hit alignments:
|
|
>> ENA|BK006936|BK006936.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(1) ! 5.9e-20 98.7 0.1 cm 1 193 [] 681858 681747 - .. 0.91 no 0.33
|
|
|
|
v NC
|
|
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<___>>> CS
|
|
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAuuggg 80
|
|
AU+ UCU+:GCCUUUUGGC:+AGAUCAAGUGUAGUAUCUGUUCUU:UCAGU+UAA+A+CUGA:AUG: CC:CA+UG:G
|
|
ENA|BK006936|BK006936.2 681858 AUC---UCUUUGCCUUUUGGCUUAGAUCAAGUGUAGUAUCUGUUCUUUUCAGUGUAACAACUGAAAUGA-CCUCAAUGAG 681783
|
|
***...************************************************************999.********** PP
|
|
|
|
v NC
|
|
>>>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
|
|
U2 81 ggccaau.uauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
|
|
G+:CA+U U+UUAA+UU AC +UUU
|
|
ENA|BK006936|BK006936.2 681782 GCUCAUUaCCUUUUAAUUUG-------------*[ 6]**[ 3]*ACAUUUU 681747
|
|
******86555555555443................7.....9..******* PP
|
|
|
|
>> ENA|BK006948|BK006948.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XV, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(2) ? 0.49 19.8 0.0 cm 1 193 [] 737498 737324 - .. 0.96 no 0.39
|
|
|
|
NC
|
|
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<~~~~~~>>>>>>,<<<<<<<___>>>>> CS
|
|
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAG*[ 8]*CUGauAuggcccccAuuggggg 82
|
|
AU+CC U U+ GCC U GGC +A AU AAGU UA UA C GUUCU:A::A U::U: ::::::A U:::::
|
|
ENA|BK006948|BK006948.2 737498 AUCCCAUAUUUGCCAUC-GGCAUAUAUUAAGUAUAUUAGCAGUUCUAAUUAC*[88]*GUAGUUGGAAGGAUACUAUCCU 737338
|
|
**************999.*******************************996...*..6999999999999999999999 PP
|
|
|
|
NC
|
|
>>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
|
|
U2 83 ccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
|
|
: A+ A CC++U
|
|
ENA|BK006948|BK006948.2 737337 UUAU--------------------------*[ 2]**[ 1]*AUCCCCU 737324
|
|
9987.............................6.....9..******* PP
|
|
|
|
>> ENA|BK006947|BK006947.3 TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(3) ? 5.7 15.3 0.0 cm 1 193 [] 266059 266208 + .. 0.91 no 0.39
|
|
|
|
v v v NC
|
|
::::::.<<<.-<<<<____>>>>->>>,,,,.,,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<< CS
|
|
U2 1 AUacCU.UCu.cgGCcUUUUgGCuaaGAUCAA.GUGUAG*[48]*aauuauaUUAaauuaAUUUUUggaacuaGuggggg 119
|
|
AU UCU + G C UUG C AGAU A GUGUAG UUAUAU +UU AU UUU G +A:: : G:
|
|
ENA|BK006947|BK006947.3 266059 AUGUUGaUCUaUCGUCAAUUGACCCAGAUGAUaGUGUAG*[ 1]*-GUUAUAUAGUUUUGAUAUUUUGGCGAAAAGUUGA 266132
|
|
*****9****999****************9988999987...5...3377778888888888888888888888888888 PP
|
|
|
|
v v v v v v v v v NC
|
|
<----.<<<<<__>>>>>-..->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS
|
|
U2 120 cauuu.uggGCUUGCccau..ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193
|
|
:A+U+ U :GCUUGC: AU +::C : :: G: :AC: G U GCA UA+ C :GU+: :C +U +
|
|
ENA|BK006947|BK006947.3 266133 GAAUAuUGCGCUUGCGUAUauAUUCCAUUUGAGGUGGCACUAGAGCUCGCAUUAU-UACCAGUAGUGGCAGGAUUGC 266208
|
|
888888**************99999******************************.99******************* PP
|
|
|
|
>> ENA|BK006949|BK006949.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome XVI, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(4) ? 6.6 15.1 0.4 cm 1 193 [] 443393 443253 - .. 0.71 no 0.32
|
|
|
|
v v v v NC
|
|
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~,<<<<<<<___>>>>>>>,,,..,,,,, CS
|
|
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]*uggcccccAuugggggccaau..uauaU 92
|
|
A U C:+G C G UA:G UCAAG UAGUAU UGUUCU ::: C A+ G :::++U
|
|
ENA|BK006949|BK006949.2 443393 CCAUUUACAUGAACCCCAGUUUAUGUUCAAG--UAGUAUAUGUUCU*[ 4]*-UCAACGCAAACUGCUGAUUUcaCC--- 443322
|
|
***************999************6..7**********98...4...222222222222222222222222... PP
|
|
|
|
v v v v v v v NC
|
|
,,,,,,,,,,,,,,,,,,,,<<<<<<<<----<<<<<__>>>>>-->>>>>>>>,,<<<<<<-<<<<<<___________ CS
|
|
U2 93 UAaauuaAUUUUUggaacuaGugggggcauuuuggGCUUGCccauugcccccaCacggguugaccuggcaUUGCAcUacc 172
|
|
:U : :::+U U UU+ ::: : A:A+ ::: G+C: : :A U CAC AC
|
|
ENA|BK006949|BK006949.2 443321 --------------------AUUGAAUAUUGU-----UUA------UAUAUGAUAUAUACCGUCAAAUUACUUCACGAC- 443274
|
|
....................222222222222.....221......3333345599***********************. PP
|
|
|
|
v v v NC
|
|
>>>>>>-->>>>>>::::::: CS
|
|
U2 173 gccagguucagcccAcccuuu 193
|
|
: : :G++C ::: + U+
|
|
ENA|BK006949|BK006949.2 443273 AGUGUGAACUGUGAUAAAUCA 443253
|
|
********************* PP
|
|
|
|
>> ENA|BK006939|BK006939.2 TPA_inf: Saccharomyces cerevisiae S288C chromosome V, complete sequence.
|
|
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
|
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
|
(5) ? 7.1 14.9 0.0 cm 1 193 [] 190882 191043 + .. 0.92 no 0.41
|
|
|
|
v v NC
|
|
::::::<<<.-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~,,,,,,,,,,,,,,,,,,,,, CS
|
|
U2 1 AUacCUUCu.cgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCU*[20]**[18]*aauuauaUUAaauuaAUUUUU 105
|
|
AU CC U : +: C: UUU:G : : AUCA AA ++U+UUAAAU AA UUUU
|
|
ENA|BK006939|BK006939.2 190882 AUUCCAUGAuUUCCUGUUUAGCU-UCAUCA-----------------*[ 4]**[ 4]*AACAUUUUUAAAUGAAAUUUU 190939
|
|
*******9955678899999976.799997....................9.....9..77699**************** PP
|
|
|
|
vv v v vv v NC
|
|
,,,,,,,<<<<<<<<----.<<<<<_._>>>>>-.........->>>>>>>>,,<<<<<<-<<<<<<__________... CS
|
|
U2 106 ggaacuaGugggggcauuu.uggGCU.UGCccau.........ugcccccaCacggguugaccuggcaUUGCAcUac... 171
|
|
+ + +GU:: : AUU :G:GCU UGC:C: + : ::ACA+ :: :: : :::: U CAC AC
|
|
ENA|BK006939|BK006939.2 190940 AAUGUCUGUUUCCUUAUUGaAGAGCUuUGCUCUGgauuuuccaACAUUAAACAUGCCGCCGAGGCCUCCUCCACCACcac 191019
|
|
***********9999998899999964799999999999999999999**************************999888 PP
|
|
|
|
v NC
|
|
.._>>>>>>-->>>>>>::::::: CS
|
|
U2 172 ..cgccagguucagcccAcccuuu 193
|
|
+:::: :UU:: :: + CU+U
|
|
ENA|BK006939|BK006939.2 191020 caUUGGCAUUUGGUGGUGAACUAU 191043
|
|
988********************* PP
|
|
|
|
|
|
|
|
Internal CM pipeline statistics summary:
|
|
----------------------------------------
|
|
Query model(s): 1 (193 consensus positions)
|
|
Target sequences: 16 (24142652 residues searched)
|
|
Target sequences re-searched for truncated hits: 16 (15424 residues re-searched)
|
|
Windows passing local HMM SSV filter: 72732 (0.7978); expected (0.35)
|
|
Windows passing local HMM Viterbi filter: 24175 (0.3233); expected (0.15)
|
|
Windows passing local HMM Viterbi bias filter: 6671 (0.09669); expected (0.15)
|
|
Windows passing local HMM Forward filter: 2037 (0.03161); expected (0.003)
|
|
Windows passing local HMM Forward bias filter: 1133 (0.01757); expected (0.003)
|
|
Windows passing glocal HMM Forward filter: 596 (0.01251); expected (0.003)
|
|
Windows passing glocal HMM Forward bias filter: 438 (0.009175); expected (0.003)
|
|
Envelopes passing glocal HMM envelope defn filter: 460 (0.00429); expected (0.003)
|
|
Envelopes passing local CM CYK filter: 38 (0.000201); expected (0.0001)
|
|
Total CM hits reported: 5 (3.063e-05); includes 0 truncated hit(s)
|
|
|
|
# CPU time: 67.17u 2.23s 00:01:09.40 Elapsed: 00:00:03.24
|
|
//
|
|
"""
|
|
self.check_raw(filename, "U2", raw)
|
|
|
|
|
|
class Hmmer3TextIndexCases(CheckIndex):
|
|
fmt = "infernal-text"
|
|
|
|
def test_infernal_text_1q_0m(self):
|
|
"""Test infernal-text indexing, cmsearch, one queries, no hits"""
|
|
filename = os.path.join("Infernal", "cmsearch_114_IRES_Yeast.txt")
|
|
self.check_index(filename, self.fmt)
|
|
|
|
def test_infernal_text_1q_mm(self):
|
|
"""Test infernal-text indexing, cmsearch, one queries, multiple hits"""
|
|
filename = os.path.join("Infernal", "cmsearch_114_5S_Yeast.txt")
|
|
self.check_index(filename, self.fmt)
|
|
|
|
def test_infernal_text_mq(self):
|
|
"""Test infernal-text indexing, cmsearch, multiple queries"""
|
|
filename = os.path.join("Infernal", "cmsearch_114_IRES_5S_U2_Yeast.txt")
|
|
self.check_index(filename, self.fmt)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|