mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Added support for cmsearch hits with non-consecutive hits
This commit is contained in:
committed by
Wibowo Arindrarto
parent
6c753c0945
commit
2f2fef1b1d
@ -1,13 +1,12 @@
|
||||
# cmsearch :: search CM(s) against a sequence database
|
||||
# INFERNAL 1.1.4 (Dec 2020)
|
||||
# Copyright (C) 2020 Howard Hughes Medical Institute.
|
||||
# INFERNAL 1.1.5 (Sep 2023)
|
||||
# Copyright (C) 2023 Howard Hughes Medical Institute.
|
||||
# Freely distributed under the BSD open source license.
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
# query CM file: RF00004.cm
|
||||
# target sequence database: GCA_000146045.2.fasta
|
||||
# show alignments in output: no
|
||||
# sequence reporting threshold: score >= 46
|
||||
# number of worker threads: 56
|
||||
# number of worker threads: 4
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
Query: U2 [CLEN=193]
|
||||
@ -17,6 +16,11 @@ Hit scores:
|
||||
rank E-value score bias sequence start end mdl trunc gc description
|
||||
---- --------- ------ ----- ----------------------- ------ ------ --- ----- ---- -----------
|
||||
(1) ! 5.9e-20 98.7 0.1 ENA|BK006936|BK006936.2 681858 681747 - cm no 0.33 TPA_inf: Saccharomyces cerevisiae S288C chromosome II,
|
||||
------ inclusion threshold ------
|
||||
(2) ? 0.49 19.8 0.0 ENA|BK006948|BK006948.2 737498 737324 - cm no 0.39 TPA_inf: Saccharomyces cerevisiae S288C chromosome XV,
|
||||
(3) ? 5.7 15.3 0.0 ENA|BK006947|BK006947.3 266059 266208 + cm no 0.39 TPA_inf: Saccharomyces cerevisiae S288C chromosome XIV
|
||||
(4) ? 6.6 15.1 0.4 ENA|BK006949|BK006949.2 443393 443253 - cm no 0.32 TPA_inf: Saccharomyces cerevisiae S288C chromosome XVI
|
||||
(5) ? 7.1 14.9 0.0 ENA|BK006939|BK006939.2 190882 191043 + cm no 0.41 TPA_inf: Saccharomyces cerevisiae S288C chromosome V,
|
||||
|
||||
|
||||
Internal CM pipeline statistics summary:
|
||||
@ -33,8 +37,8 @@ Windows passing glocal HMM Forward filter: 596 (0.01251); e
|
||||
Windows passing glocal HMM Forward bias filter: 438 (0.009175); expected (0.003)
|
||||
Envelopes passing glocal HMM envelope defn filter: 460 (0.00429); expected (0.003)
|
||||
Envelopes passing local CM CYK filter: 38 (0.000201); expected (0.0001)
|
||||
Total CM hits reported: 1 (4.636e-06); includes 0 truncated hit(s)
|
||||
Total CM hits reported: 5 (3.063e-05); includes 0 truncated hit(s)
|
||||
|
||||
# CPU time: 65.01u 2.28s 00:01:07.29 Elapsed: 00:00:03.20
|
||||
# CPU time: 40.93u 0.55s 00:00:41.48 Elapsed: 00:00:10.59
|
||||
//
|
||||
[ok]
|
||||
|
18
Tests/Infernal/U2_Yeast-shuf.tbl
Normal file
18
Tests/Infernal/U2_Yeast-shuf.tbl
Normal file
@ -0,0 +1,18 @@
|
||||
#target name accession query name accession mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc description of target
|
||||
#---------------------- --------- -------------------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- ---------------------
|
||||
ENA|BK006936|BK006936.2 - U2 RF00004 cm 1 193 681858 681747 - no 1 0.33 0.1 98.7 1.3e-20 ! -
|
||||
ENA|BK006948|BK006948.2 - U2 RF00004 cm 1 193 737498 737324 - no 1 0.39 0.0 19.8 0.11 ? -
|
||||
ENA|BK006936|BK006936.2 - U2 RF00004 cm 1 193 1370418 1370563 + no 1 0.34 0.1 15.6 1.1 ? -
|
||||
ENA|BK006936|BK006936.2 - U2 RF00004 cm 1 193 1079243 1079392 + no 1 0.39 0.0 15.3 1.3 ? -
|
||||
ENA|BK006948|BK006948.2 - U2 RF00004 cm 1 193 425490 425693 + no 1 0.34 0.9 13.7 3.1 ? -
|
||||
ENA|BK006948|BK006948.2 - U2 RF00004 cm 1 193 1073786 1073950 + no 1 0.33 0.5 11.9 8.3 ? -
|
||||
#
|
||||
# Program: cmsearch
|
||||
# Version: 1.1.5 (Sep 2023)
|
||||
# Pipeline mode: SEARCH
|
||||
# Query file: RF00004.cm
|
||||
# Target file: BK006936_7-8.fasta
|
||||
# Option settings: cmsearch --tblout U2_Yeast-shuf.tbl RF00004.cm BK006936_7-8.fasta
|
||||
# Current dir: /analysis/BioPython/Testing
|
||||
# Date: Tue Sep 17 08:23:50 2024
|
||||
# [ok]
|
182
Tests/Infernal/U2_Yeast-shuf.txt
Normal file
182
Tests/Infernal/U2_Yeast-shuf.txt
Normal file
@ -0,0 +1,182 @@
|
||||
# cmsearch :: search CM(s) against a sequence database
|
||||
# INFERNAL 1.1.5 (Sep 2023)
|
||||
# Copyright (C) 2023 Howard Hughes Medical Institute.
|
||||
# Freely distributed under the BSD open source license.
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
# query CM file: RF00004.cm
|
||||
# target sequence database: BK006936_7-8.fasta
|
||||
# number of worker threads: 4
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
Query: U2 [CLEN=193]
|
||||
Accession: RF00004
|
||||
Description: U2 spliceosomal RNA
|
||||
Hit scores:
|
||||
rank E-value score bias sequence start end mdl trunc gc description
|
||||
---- --------- ------ ----- ----------------------- ------- ------- --- ----- ---- -----------
|
||||
(1) ! 1.3e-20 98.7 0.1 ENA|BK006936|BK006936.2 681858 681747 - cm no 0.33 -
|
||||
------ inclusion threshold ------
|
||||
(2) ? 0.11 19.8 0.0 ENA|BK006948|BK006948.2 737498 737324 - cm no 0.39 -
|
||||
(3) ? 1.1 15.6 0.1 ENA|BK006936|BK006936.2 1370418 1370563 + cm no 0.34 -
|
||||
(4) ? 1.3 15.3 0.0 ENA|BK006936|BK006936.2 1079243 1079392 + cm no 0.39 -
|
||||
(5) ? 3.1 13.7 0.9 ENA|BK006948|BK006948.2 425490 425693 + cm no 0.34 -
|
||||
(6) ? 8.3 11.9 0.5 ENA|BK006948|BK006948.2 1073786 1073950 + cm no 0.33 -
|
||||
|
||||
|
||||
Hit alignments:
|
||||
>> ENA|BK006936|BK006936.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(1) ! 1.3e-20 98.7 0.1 cm 1 193 [] 681858 681747 - .. 0.91 no 0.33
|
||||
|
||||
v NC
|
||||
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<___>>> CS
|
||||
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAuuggg 80
|
||||
AU+ UCU+:GCCUUUUGGC:+AGAUCAAGUGUAGUAUCUGUUCUU:UCAGU+UAA+A+CUGA:AUG: CC:CA+UG:G
|
||||
ENA|BK006936|BK006936.2 681858 AUC---UCUUUGCCUUUUGGCUUAGAUCAAGUGUAGUAUCUGUUCUUUUCAGUGUAACAACUGAAAUGA-CCUCAAUGAG 681783
|
||||
***...************************************************************999.********** PP
|
||||
|
||||
v NC
|
||||
>>>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
|
||||
U2 81 ggccaau.uauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
|
||||
G+:CA+U U+UUAA+UU AC +UUU
|
||||
ENA|BK006936|BK006936.2 681782 GCUCAUUaCCUUUUAAUUUG-------------*[ 6]**[ 3]*ACAUUUU 681747
|
||||
******86555555555443................7.....9..******* PP
|
||||
|
||||
>> ENA|BK006948|BK006948.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(2) ? 0.11 19.8 0.0 cm 1 193 [] 737498 737324 - .. 0.96 no 0.39
|
||||
|
||||
NC
|
||||
::::::<<<-<<<<____>>>>->>>,,,,,,,,,,,,,,,,,,,,<<<<<<~~~~~~>>>>>>,<<<<<<<___>>>>> CS
|
||||
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAGUGUAGUAUCUGUUCUUauCAG*[ 8]*CUGauAuggcccccAuuggggg 82
|
||||
AU+CC U U+ GCC U GGC +A AU AAGU UA UA C GUUCU:A::A U::U: ::::::A U:::::
|
||||
ENA|BK006948|BK006948.2 737498 AUCCCAUAUUUGCCAUC-GGCAUAUAUUAAGUAUAUUAGCAGUUCUAAUUAC*[88]*GUAGUUGGAAGGAUACUAUCCU 737338
|
||||
**************999.*******************************996...*..6999999999999999999999 PP
|
||||
|
||||
NC
|
||||
>>,,,,,,,,,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~::::::: CS
|
||||
U2 83 ccaauuauaUUAaauuaAUUUUUggaacua*[34]**[40]*Acccuuu 193
|
||||
: A+ A CC++U
|
||||
ENA|BK006948|BK006948.2 737337 UUAU--------------------------*[ 2]**[ 1]*AUCCCCU 737324
|
||||
9987.............................6.....9..******* PP
|
||||
|
||||
>> ENA|BK006936|BK006936.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(3) ? 1.1 15.6 0.1 cm 1 193 [] 1370418 1370563 + .. 0.77 no 0.34
|
||||
|
||||
vv vv v NC
|
||||
::::::<<<-<<<<____>>>>->>>,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<<<<<----. CS
|
||||
U2 1 AUacCUUCucgGCcUUUUgGCuaaGAUCAAG*[53]*aauuauaUUAaauuaAUUUUUggaacuaGugggggcauuu. 124
|
||||
U UUCU :G UUU C:AAGAUCAAG AA AUAUUAA+ AA UUUUG+A ++A::::: :: +
|
||||
ENA|BK006936|BK006936.2 1370418 UUUUGUUCUAUGUAAUUUGCCUAAGAUCAAG*[ 8]*AA-CAUAUUAAUAGAACUUUUGAAGUGACAAUCGCGCGAAg 1370496
|
||||
******************99*********98...7..44.99**********************99999988877666 PP
|
||||
|
||||
v v v v vvvvv vvvvv v NC
|
||||
.....<<~~~~~>>-.->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>::::::: CS
|
||||
U2 125 .....ug*[8]*cau.ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuuu 193
|
||||
U +:: ::::: +: ::U A U+ :AUU UAC+: U A:: :ACC+ U
|
||||
ENA|BK006936|BK006936.2 1370497 uuuccAG*[8]*UAUaAUGGGAUUGUUUGCCUUAGGUACAAUUA---UACUU-----GUGAGGGGACCUAGU 1370563
|
||||
6644433..4..44467899999999988888754444444433...22222.....46888899******* PP
|
||||
|
||||
>> ENA|BK006936|BK006936.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(4) ? 1.3 15.3 0.0 cm 1 193 [] 1079243 1079392 + .. 0.91 no 0.39
|
||||
|
||||
v v v NC
|
||||
::::::.<<<.-<<<<____>>>>->>>,,,,.,,,,,,~~~~~~,,,,,,,,,,,,,,,,,,,,,,,,,,,,<<<<< CS
|
||||
U2 1 AUacCU.UCu.cgGCcUUUUgGCuaaGAUCAA.GUGUAG*[48]*aauuauaUUAaauuaAUUUUUggaacuaGuggg 117
|
||||
AU UCU + G C UUG C AGAU A GUGUAG UUAUAU +UU AU UUU G +A:: :
|
||||
ENA|BK006936|BK006936.2 1079243 AUGUUGaUCUaUCGUCAAUUGACCCAGAUGAUaGUGUAG*[ 1]*-GUUAUAUAGUUUUGAUAUUUUGGCGAAAAGUU 1079314
|
||||
*****9****999****************9988999987...5...33777788888888888888888888888888 PP
|
||||
|
||||
v v v v v v v v v NC
|
||||
<<<----.<<<<<__>>>>>-..->>>>>>>>,,<<<<<<-<<<<<<___________>>>>>>-->>>>>>:::::: CS
|
||||
U2 118 ggcauuu.uggGCUUGCccau..ugcccccaCacggguugaccuggcaUUGCAcUaccgccagguucagcccAcccuu 192
|
||||
G::A+U+ U :GCUUGC: AU +::C : :: G: :AC: G U GCA UA+ C :GU+: :C +U
|
||||
ENA|BK006936|BK006936.2 1079315 GAGAAUAuUGCGCUUGCGUAUauAUUCCAUUUGAGGUGGCACUAGAGCUCGCAUUAU-UACCAGUAGUGGCAGGAUUG 1079391
|
||||
88888888**************99999******************************.99****************** PP
|
||||
|
||||
NC
|
||||
: CS
|
||||
U2 193 u 193
|
||||
+
|
||||
ENA|BK006936|BK006936.2 1079392 C 1079392
|
||||
* PP
|
||||
|
||||
>> ENA|BK006948|BK006948.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(5) ? 3.1 13.7 0.9 cm 1 193 [] 425490 425693 + .. 0.72 no 0.34
|
||||
|
||||
v v v NC
|
||||
::::::<<<-~~~~~~->>>,,,,,,,,,,,,,,,,,,,,<<<<<<________>>>>>>,<<<<<<<__~~~~~~>>>> CS
|
||||
U2 1 AUacCUUCuc*[12]*aaGAUCAAGUGUAGUAUCUGUUCUUauCAGUuUAAuAuCUGauAuggcccccAu*[ 1]*gggg 81
|
||||
A +CUUCU+ AAGAUCAAGU U UAUC U U:UC : U A AU: GA:AU:: ::CCA+ GG::
|
||||
ENA|BK006948|BK006948.2 425490 AAUGCUUCUU*[21]*AAGAUCAAGUUUUUUAUCCUUCGAUUUCAAAUGGAGAUUGGAAAUAUAUUCCAA*[11]*GGAA 425589
|
||||
*********7...*..5*****************877778888888888899989999999999999986...5..4444 PP
|
||||
|
||||
v vvvvvvv vvvvvvv v vvvvv NC
|
||||
>>>,,,.,,,,,,,,,,,,,,,,,,,,,,,,,..<<<<<<<<----~~~~~~->>>>>>>>,,<<<<<<-<<<<<<____ CS
|
||||
U2 82 gccaau.uauaUUAaauuaAUUUUUggaacua..Gugggggcauuu*[13]*ugcccccaCacggguugaccuggcaUUG 165
|
||||
::+AU U+U UAAA +A UUUU GAA+U+ G + C+++++C ::UU :AU G
|
||||
ENA|BK006948|BK006948.2 425590 AAUUAUcUUUGCUAAAACUAGUUUUAGAAAUUggG-----------*[19]*ACCUAAUUCGACUCUUUC-----GAUAG 425666
|
||||
44444474444444444444444444444444233..............5..344555555555555555.....89*** PP
|
||||
|
||||
vvvvv v NC
|
||||
_______>>>>>>-->>>>>>::::::: CS
|
||||
U2 166 CAcUaccgccagguucagcccAcccuuu 193
|
||||
CACU++ :++A+++U AG:: AC ++UU
|
||||
ENA|BK006948|BK006948.2 425667 CACUUU-CAAAAAAUGAGGAUACAUCUU 425693
|
||||
******.56666679************* PP
|
||||
|
||||
>> ENA|BK006948|BK006948.2
|
||||
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
|
||||
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
|
||||
(6) ? 8.3 11.9 0.5 cm 1 193 [] 1073786 1073950 + .. 0.87 no 0.33
|
||||
|
||||
v NC
|
||||
::::::~~~~~~,,,,,,,,,,,,,,,,,,,,~~~~~~~~~~~~,,,.,,,,,,,,,,,,,,,,,,,,,,,,,<<<<< CS
|
||||
U2 1 AUacCU*[20]*UCAAGUGUAGUAUCUGUUCU*[20]**[18]*aau.uauaUUAaauuaAUUUUUggaacuaGuggg 117
|
||||
AUA CU AU AUAUUA AAUUUUU GA ++A : :
|
||||
ENA|BK006948|BK006948.2 1073786 AUAUCU*[25]*--------------------*[10]**[10]*-AUgCAUAUUAUCGAAAUUUUUAGAGAAAGGUUC 1073869
|
||||
***777...9.........................8.....8...336*************************99999 PP
|
||||
|
||||
vv v v vv v v v vv v v vv v v NC
|
||||
<<<----....<<<<<__>>>>>-.->>>>>>>>,,<<<<<<-<<<<<<__________.._>>>>>>-->>>>>>:: CS
|
||||
U2 118 ggcauuu....uggGCUUGCccau.ugcccccaCacggguugaccuggcaUUGCAcUac..cgccagguucagcccAc 188
|
||||
: AUU U: G:UU:C :AU U :: : AC G::U ++: :: UUGCA U +:: : UU A::C AC
|
||||
ENA|BK006948|BK006948.2 1073870 ACGAUUCaagcUUUGUUUACCAAUgUGAUGUAUUACAGUGUUCAUGUCACUUGCAUUGAcaUUGUC--UUUACACCAC 1073945
|
||||
988888899***************888899999*********988888889****98774334433..4699****** PP
|
||||
|
||||
NC
|
||||
::::: CS
|
||||
U2 189 ccuuu 193
|
||||
+U+
|
||||
ENA|BK006948|BK006948.2 1073946 AUUAA 1073950
|
||||
***** PP
|
||||
|
||||
|
||||
|
||||
Internal CM pipeline statistics summary:
|
||||
----------------------------------------
|
||||
Query model(s): 1 (193 consensus positions)
|
||||
Target sequences: 2 (5377616 residues searched)
|
||||
Target sequences re-searched for truncated hits: 2 (1928 residues re-searched)
|
||||
Windows passing local HMM SSV filter: 16114 (0.7946); expected (0.35)
|
||||
Windows passing local HMM Viterbi filter: (off)
|
||||
Windows passing local HMM Viterbi bias filter: (off)
|
||||
Windows passing local HMM Forward filter: 6428 (0.38); expected (0.005)
|
||||
Windows passing local HMM Forward bias filter: 943 (0.06533); expected (0.005)
|
||||
Windows passing glocal HMM Forward filter: 489 (0.04832); expected (0.005)
|
||||
Windows passing glocal HMM Forward bias filter: 276 (0.02619); expected (0.005)
|
||||
Envelopes passing glocal HMM envelope defn filter: 298 (0.01273); expected (0.005)
|
||||
Envelopes passing local CM CYK filter: 19 (0.0004309); expected (0.0001)
|
||||
Total CM hits reported: 6 (0.000177); includes 0 truncated hit(s)
|
||||
|
||||
# CPU time: 30.84u 0.50s 00:00:31.34 Elapsed: 00:00:08.41
|
||||
//
|
||||
[ok]
|
@ -269,15 +269,17 @@ class CmsearchCases(unittest.TestCase):
|
||||
self.assertEqual("ENA|BK006936|BK006936.2", hit.id)
|
||||
self.assertEqual("-", hit.accession)
|
||||
self.assertEqual("TPA_inf: Saccharomyces cerevisiae S288C chromosome II, complete sequence.", hit.description)
|
||||
|
||||
hsp = hit[0]
|
||||
self.assertEqual(1, len(hsp))
|
||||
self.assertEqual(5.9e-20, hsp.evalue)
|
||||
self.assertEqual(98.7, hsp.bitscore)
|
||||
self.assertEqual(0.1, hsp.bias)
|
||||
self.assertEqual(True, hsp.is_included)
|
||||
|
||||
self.assertEqual(0.33, hsp.gc)
|
||||
self.assertEqual("no", hsp.truncated)
|
||||
self.assertEqual(1, hsp.pipeline_pass)
|
||||
self.assertEqual(True, hsp.is_included)
|
||||
frag = hsp[0]
|
||||
self.assertEqual(1, frag.query_start)
|
||||
self.assertEqual(193, frag.query_end)
|
||||
@ -345,6 +347,72 @@ class CmsearchCases(unittest.TestCase):
|
||||
self.assertEqual(1, counter)
|
||||
|
||||
|
||||
def test_cmsearch_1q_mm(self):
|
||||
"""Test parsing infernal-tab, cmsearch, one queries, multiple non-consecutive hits, one hsp"""
|
||||
tab_file = get_file("U2_Yeast-shuf.tbl")
|
||||
qresults = parse(tab_file, FMT)
|
||||
counter = 0
|
||||
|
||||
qresult = next(qresults)
|
||||
counter += 1
|
||||
self.assertEqual(2, len(qresult))
|
||||
self.assertEqual(qresult.id, "U2")
|
||||
self.assertEqual(qresult.accession, "RF00004")
|
||||
# first hit
|
||||
# first hit (3 hsps at rank 1,3 and 4)
|
||||
hit = qresult[0]
|
||||
self.assertEqual(3, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006936|BK006936.2")
|
||||
self.assertEqual(hit.description, "-")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
# first hsp (rank 1)
|
||||
hsp = hit[0]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 681747)
|
||||
self.assertEqual(hsp.hit_end, 681858)
|
||||
# second hsp (rank 3)
|
||||
hsp = hit[1]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1370418)
|
||||
self.assertEqual(hsp.hit_end, 1370563)
|
||||
# last hsp (rank 4)
|
||||
hsp = hit[2]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1079243)
|
||||
self.assertEqual(hsp.hit_end, 1079392)
|
||||
# second hit
|
||||
hit = qresult[1]
|
||||
self.assertEqual(3, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006948|BK006948.2")
|
||||
self.assertEqual(hit.description, "-")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
# first hsp (rank 2)
|
||||
hsp = hit[0]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 737324)
|
||||
self.assertEqual(hsp.hit_end, 737498)
|
||||
# second hsp (rank 5)
|
||||
hsp = hit[1]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 425490)
|
||||
self.assertEqual(hsp.hit_end, 425693)
|
||||
# last hsp (rank 6)
|
||||
hsp = hit[2]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1073786)
|
||||
self.assertEqual(hsp.hit_end, 1073950)
|
||||
|
||||
# test if we've properly finished iteration
|
||||
self.assertRaises(StopIteration, next, qresults)
|
||||
self.assertEqual(1, counter)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
|
@ -264,6 +264,75 @@ class CmsearchCases(unittest.TestCase):
|
||||
self.assertEqual(1, count)
|
||||
|
||||
|
||||
def test_cmsearch_1q_mm_1h_mf_shuf(self):
|
||||
"""Test parsing infernal-text, cmsearch, one queries, multiple non-consecutive hits, one hsp, multiple fragments"""
|
||||
tab_file = get_file("U2_Yeast-shuf.txt")
|
||||
qresults = parse(tab_file, FMT)
|
||||
counter = itertools.count(start=1)
|
||||
|
||||
qresult, count = next_result(qresults, counter)
|
||||
self.assertEqual(2, len(qresult))
|
||||
self.assertEqual(qresult.id, "U2")
|
||||
self.assertEqual(qresult.seq_len, 193)
|
||||
self.assertEqual(qresult.accession, "RF00004")
|
||||
self.assertEqual(qresult.description, "U2 spliceosomal RNA")
|
||||
self.assertEqual(qresult.program, "cmsearch")
|
||||
self.assertEqual(qresult.version, "1.1.5")
|
||||
self.assertEqual(qresult.target, "BK006936_7-8.fasta")
|
||||
# first hit (3 hsps at rank 1,3 and 4)
|
||||
hit = qresult[0]
|
||||
self.assertEqual(3, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006936|BK006936.2")
|
||||
self.assertEqual(hit.description, "")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
# first hsp (rank 1)
|
||||
hsp = hit[0]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 681747)
|
||||
self.assertEqual(hsp.hit_end, 681858)
|
||||
# second hsp (rank 3)
|
||||
hsp = hit[1]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1370418)
|
||||
self.assertEqual(hsp.hit_end, 1370563)
|
||||
# last hsp (rank 4)
|
||||
hsp = hit[2]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1079243)
|
||||
self.assertEqual(hsp.hit_end, 1079392)
|
||||
# second hit
|
||||
hit = qresult[1]
|
||||
self.assertEqual(3, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006948|BK006948.2")
|
||||
self.assertEqual(hit.description, "")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
# first hsp (rank 2)
|
||||
hsp = hit[0]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 737324)
|
||||
self.assertEqual(hsp.hit_end, 737498)
|
||||
# second hsp (rank 5)
|
||||
hsp = hit[1]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 425490)
|
||||
self.assertEqual(hsp.hit_end, 425693)
|
||||
# last hsp (rank 6)
|
||||
hsp = hit[2]
|
||||
self.assertEqual(hsp.query_start, 1)
|
||||
self.assertEqual(hsp.query_end, 193)
|
||||
self.assertEqual(hsp.hit_start, 1073786)
|
||||
self.assertEqual(hsp.hit_end, 1073950)
|
||||
|
||||
# test if we've properly finished iteration
|
||||
self.assertRaises(StopIteration, next, qresults)
|
||||
self.assertEqual(1, count)
|
||||
|
||||
|
||||
def test_cmsearch_1q_mm_mh_1f(self):
|
||||
"""Test parsing infernal-text, cmsearch, one queries, one hit, multiple hsp, one fragment"""
|
||||
tab_file = get_file("5S_Yeast.txt")
|
||||
@ -419,6 +488,90 @@ class CmsearchCases(unittest.TestCase):
|
||||
self.assertEqual(1, count)
|
||||
|
||||
|
||||
def test_cmsearch_1q_1m_mh_noali_inc(self):
|
||||
"""Test parsing infernal-text, cmsearch, one queries, one hit, multiple hsp, noali, inclusion threshold"""
|
||||
tab_file = get_file("U2_Yeast-noali.txt")
|
||||
qresults = parse(tab_file, FMT)
|
||||
counter = itertools.count(start=1)
|
||||
|
||||
qresult, count = next_result(qresults, counter)
|
||||
self.assertEqual(5, len(qresult))
|
||||
self.assertEqual(qresult.id, "U2")
|
||||
self.assertEqual(qresult.seq_len, 193)
|
||||
self.assertEqual(qresult.accession, "RF00004")
|
||||
self.assertEqual(qresult.description, "U2 spliceosomal RNA")
|
||||
self.assertEqual(qresult.program, "cmsearch")
|
||||
self.assertEqual(qresult.version, "1.1.5")
|
||||
self.assertEqual(qresult.target, "GCA_000146045.2.fasta")
|
||||
# first hit
|
||||
hit = qresult[0]
|
||||
self.assertEqual(1, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006936|BK006936.2")
|
||||
self.assertEqual(hit.description, "TPA_inf: Saccharomyces cerevisiae S288C chromosome II,")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
hsp = hit[0]
|
||||
self.assertEqual(1, len(hsp))
|
||||
self.assertEqual(hsp.model, "cm")
|
||||
self.assertEqual(hsp.truncated, "no")
|
||||
self.assertEqual(hsp.gc, 0.33)
|
||||
self.assertEqual(hsp.evalue, 5.9e-20)
|
||||
self.assertEqual(hsp.bitscore, 98.7)
|
||||
self.assertEqual(hsp.bias, 0.1)
|
||||
self.assertEqual(hsp.is_included, True)
|
||||
self.assertEqual(hsp.hit_start, 681747)
|
||||
self.assertEqual(hsp.hit_end, 681858)
|
||||
frag = hsp[0]
|
||||
self.assertEqual(frag.hit_start, 681747)
|
||||
self.assertEqual(frag.hit_end, 681858)
|
||||
self.assertEqual(frag.hit_strand, -1)
|
||||
# second hit
|
||||
hit = qresult[1]
|
||||
self.assertEqual(1, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006948|BK006948.2")
|
||||
self.assertEqual(hit.description, "TPA_inf: Saccharomyces cerevisiae S288C chromosome XV,")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
hsp = hit[0]
|
||||
self.assertEqual(1, len(hsp))
|
||||
self.assertEqual(hsp.model, "cm")
|
||||
self.assertEqual(hsp.truncated, "no")
|
||||
self.assertEqual(hsp.gc, 0.39)
|
||||
self.assertEqual(hsp.evalue, 0.49)
|
||||
self.assertEqual(hsp.bitscore, 19.8)
|
||||
self.assertEqual(hsp.bias, 0.0)
|
||||
self.assertEqual(hsp.is_included, False)
|
||||
self.assertEqual(hsp.hit_start, 737324)
|
||||
self.assertEqual(hsp.hit_end, 737498)
|
||||
frag = hsp[0]
|
||||
self.assertEqual(frag.hit_start, 737324)
|
||||
self.assertEqual(frag.hit_end, 737498)
|
||||
self.assertEqual(frag.hit_strand, -1)
|
||||
# last hit
|
||||
hit = qresult[-1]
|
||||
self.assertEqual(1, len(hit))
|
||||
self.assertEqual(hit.id, "ENA|BK006939|BK006939.2")
|
||||
self.assertEqual(hit.description, "TPA_inf: Saccharomyces cerevisiae S288C chromosome V,")
|
||||
self.assertEqual(hit.query_id, "U2")
|
||||
hsp = hit[0]
|
||||
self.assertEqual(1, len(hsp))
|
||||
self.assertEqual(hsp.model, "cm")
|
||||
self.assertEqual(hsp.truncated, "no")
|
||||
self.assertEqual(hsp.gc, 0.41)
|
||||
self.assertEqual(hsp.evalue, 7.1)
|
||||
self.assertEqual(hsp.bitscore, 14.9)
|
||||
self.assertEqual(hsp.bias, 0.0)
|
||||
self.assertEqual(hsp.is_included, False)
|
||||
self.assertEqual(hsp.hit_start, 190882)
|
||||
self.assertEqual(hsp.hit_end, 191043)
|
||||
frag = hsp[0]
|
||||
self.assertEqual(frag.hit_start, 190882)
|
||||
self.assertEqual(frag.hit_end, 191043)
|
||||
self.assertEqual(frag.hit_strand, 0)
|
||||
|
||||
# test if we've properly finished iteration
|
||||
self.assertRaises(StopIteration, next, qresults)
|
||||
self.assertEqual(1, count)
|
||||
|
||||
|
||||
def test_cmsearch_1q_1m_1h_1f_hmmonly(self):
|
||||
"""Test parsing infernal-text, cmsearch, one queries, one hit, one hsp, one fragments, hmmonly"""
|
||||
tab_file = get_file("U2_Yeast-hmmonly.txt")
|
||||
|
Reference in New Issue
Block a user