From bc0d58b4c43152e598b8c4c5ceaba4c31c1c10b7 Mon Sep 17 00:00:00 2001
From: Gert Hulselmans <gert.hulselmans@kuleuven.be>
Date: Fri, 9 Aug 2024 16:53:03 +0200
Subject: [PATCH] Add support for reading GAP and WEIGHT parameters from
 Cluster Buster motif file.

---
 Bio/motifs/clusterbuster.py    | 25 +++++++++++++++++++++----
 Tests/motifs/clusterbuster.pfm |  4 +++-
 Tests/test_motifs.py           |  6 ++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/Bio/motifs/clusterbuster.py b/Bio/motifs/clusterbuster.py
index 210bc69b6..3b599359f 100644
--- a/Bio/motifs/clusterbuster.py
+++ b/Bio/motifs/clusterbuster.py
@@ -23,12 +23,14 @@ class Record(list):
 def read(handle):
     """Read motifs in Cluster Buster position frequency matrix format from a file handle.
 
-    Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
+    Cluster Buster motif format: https://bu.wenglab.org/cluster-buster/help/cis-format.html
     """
     motif_nbr = 0
     record = Record()
     nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
     motif_name = ""
+    motif_gap = None
+    motif_weight = None
 
     for line in handle:
         line = line.strip()
@@ -37,13 +39,23 @@ def read(handle):
                 if motif_nbr != 0:
                     motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
                     motif.name = motif_name
+                    motif.gap = motif_gap
+                    motif.weight = motif_weight
                     record.append(motif)
 
                 motif_name = line[1:].strip()
                 nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+                motif_gap = None
+                motif_weight = None
                 motif_nbr += 1
             else:
-                if line.startswith("#"):
+                if line.startswith("# GAP"):
+                    motif_gap = float(line.split()[2])
+                    continue
+                elif line.startswith("# WEIGHT"):
+                    motif_weight = float(line.split()[2])
+                    continue
+                elif line.startswith("#"):
                     continue
 
                 matrix_columns = line.split()
@@ -58,6 +70,8 @@ def read(handle):
 
     motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
     motif.name = motif_name
+    motif.gap = motif_gap
+    motif.weight = motif_weight
     record.append(motif)
 
     return record
@@ -67,8 +81,11 @@ def write(motifs):
     """Return the representation of motifs in Cluster Buster position frequency matrix format."""
     lines = []
     for m in motifs:
-        line = f">{m.name}\n"
-        lines.append(line)
+        lines.append(f">{m.name}\n")
+        if m.weight:
+            lines.append(f"# WEIGHT: {m.weight}\n")
+        if m.gap:
+            lines.append(f"# GAP: {m.gap}\n")
         for ACGT_counts in zip(
             m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
         ):
diff --git a/Tests/motifs/clusterbuster.pfm b/Tests/motifs/clusterbuster.pfm
index ba7ba51e5..265d5ead3 100644
--- a/Tests/motifs/clusterbuster.pfm
+++ b/Tests/motifs/clusterbuster.pfm
@@ -13,6 +13,8 @@
 0   0   0   24
 0   0   24  0
 >MA0008.1
+# WEIGHT: 3.0
+# GAP: 10.0
 3   13  4   5
 21  1   0   3
 25  0   0   0
@@ -20,4 +22,4 @@
 0   5   0   20
 24  0   1   0
 1   0   0   24
-0   0   2   23
\ No newline at end of file
+0   0   2   23
diff --git a/Tests/test_motifs.py b/Tests/test_motifs.py
index 501bc9f7f..7b5311efc 100644
--- a/Tests/test_motifs.py
+++ b/Tests/test_motifs.py
@@ -1659,6 +1659,8 @@ class TestClusterBuster(unittest.TestCase):
             )
             self.assertEqual(motif[1:-2].consensus, "ACG")
             self.assertEqual(motif.length, 6)
+            self.assertIsNone(motif.weight)
+            self.assertIsNone(motif.gap)
             self.assertAlmostEqual(motif.counts["G", 0], 0.0)
             self.assertAlmostEqual(motif.counts["G", 1], 1.0)
             self.assertAlmostEqual(motif.counts["G", 2], 0.0)
@@ -1705,6 +1707,8 @@ class TestClusterBuster(unittest.TestCase):
             )
             self.assertEqual(motif[1:-2].consensus, "GCG")
             self.assertEqual(motif.length, 6)
+            self.assertIsNone(motif.weight)
+            self.assertIsNone(motif.gap)
             self.assertAlmostEqual(motif.counts["G", 0], 2.0)
             self.assertAlmostEqual(motif.counts["G", 1], 23.0)
             self.assertAlmostEqual(motif.counts["G", 2], 0.0)
@@ -1753,6 +1757,8 @@ class TestClusterBuster(unittest.TestCase):
             )
             self.assertEqual(motif[1:-2].consensus, "AATTA")
             self.assertEqual(motif.length, 8)
+            self.assertEqual(motif.weight, 3.0)
+            self.assertEqual(motif.gap, 10.0)
             self.assertAlmostEqual(motif.counts["G", 0], 4.0)
             self.assertAlmostEqual(motif.counts["G", 1], 0.0)
             self.assertAlmostEqual(motif.counts["G", 2], 0.0)