15 Nov 00 Brad

* Added SubsMat tests, sample files and expected outputs.
* Updated alignment test to use SubsMat.FreqTable.
This commit is contained in:
chapmanb
2000-11-15 18:34:43 +00:00
parent 19dc3415e3
commit 431dec3764
9 changed files with 1261 additions and 1 deletions

View File

@ -0,0 +1,20 @@
A 2377 0.0852491
C 383 0.013736
D 1669 0.0598573
E 1655 0.0593552
F 1167 0.0418535
G 2185 0.0783632
H 627 0.0224868
I 1455 0.0521823
K 1614 0.0578847
L 2360 0.0846394
M 651 0.0233476
N 1387 0.0497436
P 1191 0.0427142
Q 1022 0.0366532
R 1282 0.0459778
S 1664 0.0596779
T 1599 0.0573468
V 2052 0.0735932
W 421 0.0150988
Y 1119 0.040132

Binary file not shown.

View File

@ -0,0 +1,20 @@
A 2377
C 383
D 1669
E 1655
F 1167
G 2185
H 627
I 1455
K 1614
L 2360
M 651
N 1387
P 1191
Q 1022
R 1282
S 1664
T 1599
V 2052
W 421
Y 1119

View File

@ -0,0 +1,20 @@
A 0.0852491
C 0.013736
D 0.0598573
E 0.0593552
F 0.0418535
G 0.0783632
H 0.0224868
I 0.0521823
K 0.0578847
L 0.0846394
M 0.0233476
N 0.0497436
P 0.0427142
Q 0.0366532
R 0.0459778
S 0.0596779
T 0.0573468
V 0.0735932
W 0.0150988
Y 0.040132

View File

@ -0,0 +1,7 @@
F 9278 0.332748
R 3334 0.119571
O 5672 0.203422
T 2896 0.103862
N 3324 0.119212
G 2185 0.0783632
P 1191 0.0427142

View File

@ -0,0 +1,8 @@
A 3411 0.2104153
F 5867 0.1223327
R 3334 0.119571
O 5672 0.203422
T 2896 0.103862
N 3324 0.119212
G 2185 0.0783632
P 1191 0.0427142

1140
Tests/output/test_SubsMat Normal file

File diff suppressed because it is too large Load Diff

40
Tests/test_SubsMat.py Normal file
View File

@ -0,0 +1,40 @@
import cPickle
import sys
from Bio.SubsMat import *
f = sys.stdout
ftab_prot = FreqTable.read_count(open('SubsMat/protein_count.txt'))
ctab_prot = FreqTable.read_freq(open('SubsMat/protein_freq.txt'))
f.write("Check differences between derived and true frequencies for each\n")
f.write("letter. Differences should be very small\n")
for i in ftab_prot.alphabet.letters:
f.write("%s %f\n" % (i, abs(ftab_prot[i] - ctab_prot[i])))
acc_rep_mat = cPickle.load(open('SubsMat/acc_rep_mat.pik'))
acc_rep_mat = SubsMat.SeqMat(acc_rep_mat)
obs_freq_mat = SubsMat._build_obs_freq_mat(acc_rep_mat)
ftab_prot2 = SubsMat._exp_freq_table_from_obs_freq(obs_freq_mat)
obs_freq_mat.print_mat(f=f,format=" %4.3f")
f.write("Diff between supplied and matrix-derived frequencies, should be small\n")
for i in ftab_prot.keys():
f.write("%s %.2f\n" % (i,abs(ftab_prot[i] - ftab_prot2[i])))
s = 0.
f.write("Calculating sum of letters for an observed frequency matrix\n")
obs_freq_mat.all_letters_sum()
for i in obs_freq_mat.sum_letters.keys():
f.write("%s\t%.2f\n" % (i, obs_freq_mat.sum_letters[i]))
s += obs_freq_mat.sum_letters[i]
f.write("Total sum %.2f should be 1.0\n" % (s))
lo_mat_prot = \
SubsMat.make_log_odds_matrix(acc_rep_mat=acc_rep_mat,round_digit=1) #,ftab_prot
f.write("\nLog odds matrix\n")
lo_mat_prot.print_mat(f=f,format=" %.2f",alphabet='AVILMCFWYHSTNQKRDEGP')
f.write("\nTesting MatrixInfo\n")
for i in MatrixInfo.available_matrices:
mat = SubsMat.SeqMat(getattr(MatrixInfo,i))
f.write("\n%s\n------------\n" % i)
mat.print_mat(f=f)

View File

@ -12,10 +12,12 @@ o Converting between formats"""
import os
# biopython
from Bio.Alphabet import IUPAC
from Bio.Clustalw import Clustalw
from Bio.Align.FormatConvert import FormatConverter
from Bio.Align import AlignInfo
from Bio.Fasta import FastaAlign
from Bio.SubsMat import FreqTable
print "testing reading and writing clustal format..."
test_dir = os.path.join(os.getcwd(), 'Clustalw')
@ -74,8 +76,11 @@ e_freq = {'G' : 0.25,
'A' : 0.25,
'T' : 0.25}
e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ,
IUPAC.unambiguous_dna)
print 'relative information:', align_info.information_content(
expected_freqs = e_freq,
e_freq_table = e_freq_table,
chars_to_ignore = ['N'])
print "testing reading and writing fasta format..."