Locale independent date serialization (#4977)

Locale independent date serialization, also accept datetime.date

Add warning when date is invalid/not in the expected format

Closes #4972
This commit is contained in:
bryan brancotte
2025-03-21 16:39:57 +01:00
committed by GitHub
parent 062eb515f5
commit 79759c8394
3 changed files with 85 additions and 5 deletions

View File

@ -31,7 +31,7 @@ http://www.ebi.ac.uk/imgt/hla/docs/manual.html
"""
import warnings
from datetime import datetime
from datetime import datetime, date as datetime_date
from string import ascii_letters
from string import digits
@ -633,9 +633,6 @@ class GenBankWriter(_InsdcWriter):
# Cope with a list of one string:
if isinstance(date, list) and len(date) == 1:
date = date[0]
if isinstance(date, datetime):
date = date.strftime("%d-%b-%Y").upper()
months = [
"JAN",
"FEB",
@ -650,11 +647,21 @@ class GenBankWriter(_InsdcWriter):
"NOV",
"DEC",
]
if isinstance(date, datetime_date):
date = f"{date.day:02d}-{months[date.month - 1]}-{date.year}"
if not isinstance(date, str) or len(date) != 11:
warnings.warn(
f"Invalid date format provided {record.annotations['date']!r}, using default {default!r}",
BiopythonWarning,
)
return default
try:
datetime(int(date[-4:]), months.index(date[3:6]) + 1, int(date[0:2]))
except ValueError:
warnings.warn(
f"Invalid date provided {record.annotations['date']!r}, using default {default!r}",
BiopythonWarning,
)
date = default
return date

View File

@ -74,6 +74,7 @@ please open an issue on GitHub or mention it on the mailing list.
- Brandon Carter <https://github.com/b-carter>
- Brandon Invergo <https://github.com/brandoninvergo>
- Brian Osborne <https://github.com/bosborne>
- Bryan Brancotte <https://github.com/bryan-brancotte>
- Bryan Lunt <https://github.com/bryan-lunt>
- Caio Fontes <https://github.com/Caiofcas>
- Cam McMenamie <https://github.com/kamurani>

View File

@ -7,7 +7,8 @@
Initially this takes matched tests of GenBank and FASTA files from the NCBI
and confirms they are consistent using our different parsers.
"""
import datetime
import locale
import os
import unittest
import warnings
@ -208,6 +209,27 @@ class SeqIOFeatureTestBaseClass(SeqIOTestBaseClass):
# Not held in EMBL files
self.assertEqual(r1.medline_id, r2.medline_id, msg=msg)
self.assertEqual(r1.pubmed_id, r2.pubmed_id, msg=msg)
elif key == "date" and (
isinstance(old.annotations[key], datetime.datetime)
or isinstance(old.annotations[key], datetime.date)
):
oa = old.annotations[key]
months = [
"JAN",
"FEB",
"MAR",
"APR",
"MAY",
"JUN",
"JUL",
"AUG",
"SEP",
"OCT",
"NOV",
"DEC",
]
oa = f"{oa.day:02d}-{months[oa.month - 1]}-{oa.year}"
self.assertEqual(oa, new.annotations[key], msg=msg)
else:
self.assertEqual(
repr(old.annotations[key]), repr(new.annotations[key]), msg=msg
@ -1064,6 +1086,56 @@ class FeatureWriting(SeqIOFeatureTestBaseClass):
self.write_read_checks()
def test_datetime(self):
default_locale = locale.setlocale(locale.LC_ALL)
for locale_name in [
"C",
"de_DE.utf8",
"fr_FR.utf8",
"pt_BR.utf8",
]:
try:
locale.setlocale(locale.LC_ALL, locale_name)
# Why october ? Because it is abbreviated to OKT in DE, OCT. in FR, OUT in BR
self.record.annotations["date"] = datetime.datetime.strptime(
"2025-10-01", "%Y-%m-%d"
)
self.write_read_check("gb")
except locale.Error:
pass
locale.setlocale(locale.LC_ALL, default_locale)
def test_date(self):
for m in [1, 10, 12]:
self.record.annotations["date"] = datetime.datetime.strptime(
f"2025-{m}-01", "%Y-%m-%d"
).date()
self.write_read_check("gb")
def test_invalid_date_format(self):
# Using a date in the wrong format
self.record.annotations["date"] = "04-04-1970"
stream = StringIO()
with warnings.catch_warnings(record=True) as w:
SeqIO.write([self.record], stream, "gb")
self.assertEqual(len(w), 1, "a warning should be raised")
self.assertIn("Invalid date format", str(w[0].message))
stream.seek(0)
self.assertIn("1980", stream.getvalue())
self.assertNotIn(self.record.annotations["date"], stream.getvalue())
def test_invalid_date_locale(self):
# Using a date not in english which is not accepted by the writer
self.record.annotations["date"] = "04-OKT-1970"
stream = StringIO()
with warnings.catch_warnings(record=True) as w:
SeqIO.write([self.record], stream, "gb")
self.assertEqual(len(w), 1, "a warning should be raised")
self.assertIn("Invalid date", str(w[0].message))
stream.seek(0)
self.assertIn("1980", stream.getvalue())
self.assertNotIn(self.record.annotations["date"], stream.getvalue())
class NC_000932(SeqIOFeatureTestBaseClass):
# This includes an evil dual strand gene