mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
15 Nov 00 Brad
* Added SubsMat tests, sample files and expected outputs. * Updated alignment test to use SubsMat.FreqTable.
This commit is contained in:
20
Tests/SubsMat/aaDistrib.txt
Normal file
20
Tests/SubsMat/aaDistrib.txt
Normal file
@ -0,0 +1,20 @@
|
||||
A 2377 0.0852491
|
||||
C 383 0.013736
|
||||
D 1669 0.0598573
|
||||
E 1655 0.0593552
|
||||
F 1167 0.0418535
|
||||
G 2185 0.0783632
|
||||
H 627 0.0224868
|
||||
I 1455 0.0521823
|
||||
K 1614 0.0578847
|
||||
L 2360 0.0846394
|
||||
M 651 0.0233476
|
||||
N 1387 0.0497436
|
||||
P 1191 0.0427142
|
||||
Q 1022 0.0366532
|
||||
R 1282 0.0459778
|
||||
S 1664 0.0596779
|
||||
T 1599 0.0573468
|
||||
V 2052 0.0735932
|
||||
W 421 0.0150988
|
||||
Y 1119 0.040132
|
BIN
Tests/SubsMat/acc_rep_mat.pik
Normal file
BIN
Tests/SubsMat/acc_rep_mat.pik
Normal file
Binary file not shown.
20
Tests/SubsMat/protein_count.txt
Normal file
20
Tests/SubsMat/protein_count.txt
Normal file
@ -0,0 +1,20 @@
|
||||
A 2377
|
||||
C 383
|
||||
D 1669
|
||||
E 1655
|
||||
F 1167
|
||||
G 2185
|
||||
H 627
|
||||
I 1455
|
||||
K 1614
|
||||
L 2360
|
||||
M 651
|
||||
N 1387
|
||||
P 1191
|
||||
Q 1022
|
||||
R 1282
|
||||
S 1664
|
||||
T 1599
|
||||
V 2052
|
||||
W 421
|
||||
Y 1119
|
20
Tests/SubsMat/protein_freq.txt
Normal file
20
Tests/SubsMat/protein_freq.txt
Normal file
@ -0,0 +1,20 @@
|
||||
A 0.0852491
|
||||
C 0.013736
|
||||
D 0.0598573
|
||||
E 0.0593552
|
||||
F 0.0418535
|
||||
G 0.0783632
|
||||
H 0.0224868
|
||||
I 0.0521823
|
||||
K 0.0578847
|
||||
L 0.0846394
|
||||
M 0.0233476
|
||||
N 0.0497436
|
||||
P 0.0427142
|
||||
Q 0.0366532
|
||||
R 0.0459778
|
||||
S 0.0596779
|
||||
T 0.0573468
|
||||
V 0.0735932
|
||||
W 0.0150988
|
||||
Y 0.040132
|
7
Tests/SubsMat/redAADistrib.txt
Normal file
7
Tests/SubsMat/redAADistrib.txt
Normal file
@ -0,0 +1,7 @@
|
||||
F 9278 0.332748
|
||||
R 3334 0.119571
|
||||
O 5672 0.203422
|
||||
T 2896 0.103862
|
||||
N 3324 0.119212
|
||||
G 2185 0.0783632
|
||||
P 1191 0.0427142
|
8
Tests/SubsMat/redAltAADistrib.txt
Normal file
8
Tests/SubsMat/redAltAADistrib.txt
Normal file
@ -0,0 +1,8 @@
|
||||
A 3411 0.2104153
|
||||
F 5867 0.1223327
|
||||
R 3334 0.119571
|
||||
O 5672 0.203422
|
||||
T 2896 0.103862
|
||||
N 3324 0.119212
|
||||
G 2185 0.0783632
|
||||
P 1191 0.0427142
|
1140
Tests/output/test_SubsMat
Normal file
1140
Tests/output/test_SubsMat
Normal file
File diff suppressed because it is too large
Load Diff
40
Tests/test_SubsMat.py
Normal file
40
Tests/test_SubsMat.py
Normal file
@ -0,0 +1,40 @@
|
||||
import cPickle
|
||||
import sys
|
||||
from Bio.SubsMat import *
|
||||
|
||||
f = sys.stdout
|
||||
ftab_prot = FreqTable.read_count(open('SubsMat/protein_count.txt'))
|
||||
ctab_prot = FreqTable.read_freq(open('SubsMat/protein_freq.txt'))
|
||||
f.write("Check differences between derived and true frequencies for each\n")
|
||||
f.write("letter. Differences should be very small\n")
|
||||
for i in ftab_prot.alphabet.letters:
|
||||
f.write("%s %f\n" % (i, abs(ftab_prot[i] - ctab_prot[i])))
|
||||
|
||||
acc_rep_mat = cPickle.load(open('SubsMat/acc_rep_mat.pik'))
|
||||
acc_rep_mat = SubsMat.SeqMat(acc_rep_mat)
|
||||
obs_freq_mat = SubsMat._build_obs_freq_mat(acc_rep_mat)
|
||||
ftab_prot2 = SubsMat._exp_freq_table_from_obs_freq(obs_freq_mat)
|
||||
obs_freq_mat.print_mat(f=f,format=" %4.3f")
|
||||
|
||||
|
||||
f.write("Diff between supplied and matrix-derived frequencies, should be small\n")
|
||||
for i in ftab_prot.keys():
|
||||
f.write("%s %.2f\n" % (i,abs(ftab_prot[i] - ftab_prot2[i])))
|
||||
|
||||
s = 0.
|
||||
f.write("Calculating sum of letters for an observed frequency matrix\n")
|
||||
obs_freq_mat.all_letters_sum()
|
||||
for i in obs_freq_mat.sum_letters.keys():
|
||||
f.write("%s\t%.2f\n" % (i, obs_freq_mat.sum_letters[i]))
|
||||
s += obs_freq_mat.sum_letters[i]
|
||||
f.write("Total sum %.2f should be 1.0\n" % (s))
|
||||
lo_mat_prot = \
|
||||
SubsMat.make_log_odds_matrix(acc_rep_mat=acc_rep_mat,round_digit=1) #,ftab_prot
|
||||
f.write("\nLog odds matrix\n")
|
||||
lo_mat_prot.print_mat(f=f,format=" %.2f",alphabet='AVILMCFWYHSTNQKRDEGP')
|
||||
|
||||
f.write("\nTesting MatrixInfo\n")
|
||||
for i in MatrixInfo.available_matrices:
|
||||
mat = SubsMat.SeqMat(getattr(MatrixInfo,i))
|
||||
f.write("\n%s\n------------\n" % i)
|
||||
mat.print_mat(f=f)
|
@ -12,10 +12,12 @@ o Converting between formats"""
|
||||
import os
|
||||
|
||||
# biopython
|
||||
from Bio.Alphabet import IUPAC
|
||||
from Bio.Clustalw import Clustalw
|
||||
from Bio.Align.FormatConvert import FormatConverter
|
||||
from Bio.Align import AlignInfo
|
||||
from Bio.Fasta import FastaAlign
|
||||
from Bio.SubsMat import FreqTable
|
||||
|
||||
print "testing reading and writing clustal format..."
|
||||
test_dir = os.path.join(os.getcwd(), 'Clustalw')
|
||||
@ -74,8 +76,11 @@ e_freq = {'G' : 0.25,
|
||||
'A' : 0.25,
|
||||
'T' : 0.25}
|
||||
|
||||
e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ,
|
||||
IUPAC.unambiguous_dna)
|
||||
|
||||
print 'relative information:', align_info.information_content(
|
||||
expected_freqs = e_freq,
|
||||
e_freq_table = e_freq_table,
|
||||
chars_to_ignore = ['N'])
|
||||
|
||||
print "testing reading and writing fasta format..."
|
||||
|
Reference in New Issue
Block a user