mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Add support for reading GAP and WEIGHT parameters from Cluster Buster motif file.
This commit is contained in:
committed by
Peter Cock
parent
f02217c40e
commit
bc0d58b4c4
@ -23,12 +23,14 @@ class Record(list):
|
||||
def read(handle):
|
||||
"""Read motifs in Cluster Buster position frequency matrix format from a file handle.
|
||||
|
||||
Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
|
||||
Cluster Buster motif format: https://bu.wenglab.org/cluster-buster/help/cis-format.html
|
||||
"""
|
||||
motif_nbr = 0
|
||||
record = Record()
|
||||
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
||||
motif_name = ""
|
||||
motif_gap = None
|
||||
motif_weight = None
|
||||
|
||||
for line in handle:
|
||||
line = line.strip()
|
||||
@ -37,13 +39,23 @@ def read(handle):
|
||||
if motif_nbr != 0:
|
||||
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
||||
motif.name = motif_name
|
||||
motif.gap = motif_gap
|
||||
motif.weight = motif_weight
|
||||
record.append(motif)
|
||||
|
||||
motif_name = line[1:].strip()
|
||||
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
||||
motif_gap = None
|
||||
motif_weight = None
|
||||
motif_nbr += 1
|
||||
else:
|
||||
if line.startswith("#"):
|
||||
if line.startswith("# GAP"):
|
||||
motif_gap = float(line.split()[2])
|
||||
continue
|
||||
elif line.startswith("# WEIGHT"):
|
||||
motif_weight = float(line.split()[2])
|
||||
continue
|
||||
elif line.startswith("#"):
|
||||
continue
|
||||
|
||||
matrix_columns = line.split()
|
||||
@ -58,6 +70,8 @@ def read(handle):
|
||||
|
||||
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
||||
motif.name = motif_name
|
||||
motif.gap = motif_gap
|
||||
motif.weight = motif_weight
|
||||
record.append(motif)
|
||||
|
||||
return record
|
||||
@ -67,8 +81,11 @@ def write(motifs):
|
||||
"""Return the representation of motifs in Cluster Buster position frequency matrix format."""
|
||||
lines = []
|
||||
for m in motifs:
|
||||
line = f">{m.name}\n"
|
||||
lines.append(line)
|
||||
lines.append(f">{m.name}\n")
|
||||
if m.weight:
|
||||
lines.append(f"# WEIGHT: {m.weight}\n")
|
||||
if m.gap:
|
||||
lines.append(f"# GAP: {m.gap}\n")
|
||||
for ACGT_counts in zip(
|
||||
m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
|
||||
):
|
||||
|
@ -13,6 +13,8 @@
|
||||
0 0 0 24
|
||||
0 0 24 0
|
||||
>MA0008.1
|
||||
# WEIGHT: 3.0
|
||||
# GAP: 10.0
|
||||
3 13 4 5
|
||||
21 1 0 3
|
||||
25 0 0 0
|
||||
@ -20,4 +22,4 @@
|
||||
0 5 0 20
|
||||
24 0 1 0
|
||||
1 0 0 24
|
||||
0 0 2 23
|
||||
0 0 2 23
|
||||
|
@ -1659,6 +1659,8 @@ class TestClusterBuster(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(motif[1:-2].consensus, "ACG")
|
||||
self.assertEqual(motif.length, 6)
|
||||
self.assertIsNone(motif.weight)
|
||||
self.assertIsNone(motif.gap)
|
||||
self.assertAlmostEqual(motif.counts["G", 0], 0.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 1], 1.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||
@ -1705,6 +1707,8 @@ class TestClusterBuster(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(motif[1:-2].consensus, "GCG")
|
||||
self.assertEqual(motif.length, 6)
|
||||
self.assertIsNone(motif.weight)
|
||||
self.assertIsNone(motif.gap)
|
||||
self.assertAlmostEqual(motif.counts["G", 0], 2.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 1], 23.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||
@ -1753,6 +1757,8 @@ class TestClusterBuster(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(motif[1:-2].consensus, "AATTA")
|
||||
self.assertEqual(motif.length, 8)
|
||||
self.assertEqual(motif.weight, 3.0)
|
||||
self.assertEqual(motif.gap, 10.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 0], 4.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 1], 0.0)
|
||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||
|
Reference in New Issue
Block a user