Skip to content

Commit

Permalink
pythongh-124531: Fix strftime() with embedded null characters
Browse files Browse the repository at this point in the history
* time.strftime() (raised ValueError)
* the strftime() method and formatting of the datetime classes
  datetime, date and time (truncated at the null character)
  • Loading branch information
serhiy-storchaka committed Oct 5, 2024
1 parent 5e9e506 commit 166b6fe
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 52 deletions.
20 changes: 20 additions & 0 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -2955,6 +2955,16 @@ def test_more_strftime(self):
except UnicodeEncodeError:
pass

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
self.assertEqual(t.strftime('\0'), '\0')
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
s1 = t.strftime('%c')
s2 = t.strftime('%x')
self.assertEqual(t.strftime('\0%c\0%x'), f'\0{s1}\0{s2}')
self.assertEqual(t.strftime('\0%c\0%x\0'), f'\0{s1}\0{s2}\0')

def test_extract(self):
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
self.assertEqual(dt.date(), date(2002, 3, 4))
Expand Down Expand Up @@ -3736,6 +3746,16 @@ def test_strftime(self):
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
t.strftime(format="%f")

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
t = self.theclass(1, 2, 3, 4)
self.assertEqual(t.strftime('\0'), '\0')
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
s1 = t.strftime('%Z')
s2 = t.strftime('%X')
self.assertEqual(t.strftime('\0%Z\0%X'), f'\0{s1}\0{s2}')
self.assertEqual(t.strftime('\0%Z\0%X\0'), f'\0{s1}\0{s2}\0')

def test_format(self):
t = self.theclass(1, 2, 3, 4)
self.assertEqual(t.__format__(''), str(t))
Expand Down
12 changes: 10 additions & 2 deletions Lib/test/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,16 @@ def test_strftime(self):
self.fail('conversion specifier: %r failed.' % format)

self.assertRaises(TypeError, time.strftime, b'%S', tt)
# embedded null character
self.assertRaises(ValueError, time.strftime, '%S\0', tt)

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
tt = time.gmtime(self.t)
self.assertEqual(time.strftime('\0', tt), '\0')
self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
s1 = time.strftime('%c', tt)
s2 = time.strftime('%x', tt)
self.assertEqual(time.strftime('\0%c\0%x', tt), f'\0{s1}\0{s2}')
self.assertEqual(time.strftime('\0%c\0%x\0', tt), f'\0{s1}\0{s2}\0')

def _bounds_checking(self, func):
# Make sure that strftime() checks the bounds of the various parts
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :func:`time.strftime`, the :meth:`~datetime.datetime.strftime` method of
the :mod:`datetime` classes :class:`~datetime.datetime`,
:class:`~datetime.date` and :class:`~datetime.time` and formatting of these
classes with format strings containing embedded null characters.
22 changes: 7 additions & 15 deletions Modules/_datetimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1837,6 +1837,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
PyObject *freplacement = NULL; /* py string, replacement for %f */

const char *pin; /* pointer to next char in input format */
const char *pend; /* pointer past the end of input format */
Py_ssize_t flen; /* length of input format */
char ch; /* next char in input format */

Expand Down Expand Up @@ -1886,22 +1887,15 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
pnew = PyBytes_AsString(newfmt);
usednew = 0;

while ((ch = *pin++) != '\0') {
if (ch != '%') {
ptoappend = pin - 1;
ntoappend = 1;
}
else if ((ch = *pin++) == '\0') {
/* Null byte follows %, copy only '%'.
*
* Back the pin up one char so that we catch the null check
* the next time through the loop.*/
pin--;
pend = pin + flen;
while (pin != pend) {
ch = *pin++;
if (ch != '%' || pin == pend) {
ptoappend = pin - 1;
ntoappend = 1;
}
/* A % has been seen and ch is the character after it. */
else if (ch == 'z') {
else if ((ch = *pin++) == 'z') {
/* %z -> +HHMM */
if (zreplacement == NULL) {
zreplacement = make_somezreplacement(object, "", tzinfoarg);
Expand Down Expand Up @@ -2035,12 +2029,10 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
assert(usednew <= totalnew);
} /* end while() */

if (_PyBytes_Resize(&newfmt, usednew) < 0)
goto Done;
{
PyObject *format;

format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt));
format = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(newfmt), usednew);
if (format != NULL) {
result = PyObject_CallFunctionObjArgs(strftime,
format, timetuple, NULL);
Expand Down
79 changes: 44 additions & 35 deletions Modules/timemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -787,16 +787,14 @@ time_strftime(PyObject *module, PyObject *args)
PyObject *format;
#endif
PyObject *format_arg;
Py_ssize_t fmtsize;
size_t fmtlen, buflen;
time_char *outbuf = NULL;
size_t i;
size_t outsize, outpos;
PyObject *ret = NULL;

memset((void *) &buf, '\0', sizeof(buf));

/* Will always expect a unicode string to be passed as format.
Given that there's no str type anymore in py3k this seems safe.
*/
if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup))
return NULL;

Expand Down Expand Up @@ -835,7 +833,7 @@ time_strftime(PyObject *module, PyObject *args)
buf.tm_isdst = 1;

#ifdef HAVE_WCSFTIME
format = PyUnicode_AsWideCharString(format_arg, NULL);
format = PyUnicode_AsWideCharString(format_arg, &fmtsize);
if (format == NULL)
return NULL;
fmt = format;
Expand All @@ -845,35 +843,36 @@ time_strftime(PyObject *module, PyObject *args)
if (format == NULL)
return NULL;
fmt = PyBytes_AS_STRING(format);
fmtsize = PyBytes_GET_SIZE(format);
#endif

#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME)
/* check that the format string contains only valid directives */
for (outbuf = strchr(fmt, '%');
outbuf != NULL;
outbuf = strchr(outbuf+2, '%'))
for (const time_char *f = memchr(fmt, '%', fmtsize);
f != NULL;
f = memchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
{
if (outbuf[1] == '#')
++outbuf; /* not documented by python, */
if (outbuf[1] == '\0')
if (f[1] == '#')
++f; /* not documented by python, */
if (f + 1 >= fmt + fmtsize)
break;
if ((outbuf[1] == 'y') && buf.tm_year < 0) {
if ((f[1] == 'y') && buf.tm_year < 0) {
PyErr_SetString(PyExc_ValueError,
"format %y requires year >= 1900 on Windows");
Py_DECREF(format);
return NULL;
}
}
#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME)
for (outbuf = wcschr(fmt, '%');
outbuf != NULL;
outbuf = wcschr(outbuf+2, '%'))
for (const time_char *f = wmemchr(fmt, '%', fmtsize);
f != NULL;
f = wmemchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
{
if (outbuf[1] == L'\0')
if (f + 1 >= fmt + fmtsize)
break;
/* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0))
returns "0/" instead of "99" */
if (outbuf[1] == L'y' && buf.tm_year < 0) {
if (f[1] == L'y' && buf.tm_year < 0) {
PyErr_SetString(PyExc_ValueError,
"format %y requires year >= 1900 on AIX");
PyMem_Free(format);
Expand All @@ -882,13 +881,14 @@ time_strftime(PyObject *module, PyObject *args)
}
#endif

fmtlen = time_strlen(fmt);

/* I hate these functions that presume you know how big the output
* will be ahead of time...
*/
for (i = 1024; ; i += i) {
outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char));
outsize = fmtsize + 128;
outpos = 0;
fmtlen = time_strlen(fmt);
while (1) {
outbuf = (time_char *)PyMem_Realloc(outbuf, outsize*sizeof(time_char));
if (outbuf == NULL) {
PyErr_NoMemory();
break;
Expand All @@ -897,32 +897,41 @@ time_strftime(PyObject *module, PyObject *args)
errno = 0;
#endif
_Py_BEGIN_SUPPRESS_IPH
buflen = format_time(outbuf, i, fmt, &buf);
buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf);
_Py_END_SUPPRESS_IPH
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
/* VisualStudio .NET 2005 does this properly */
if (buflen == 0 && errno == EINVAL) {
PyErr_SetString(PyExc_ValueError, "Invalid format string");
PyMem_Free(outbuf);
break;
}
#endif
if (buflen > 0 || i >= 256 * fmtlen) {
/* If the buffer is 256 times as long as the format,
it's probably not failing for lack of room!
More likely, the format yields an empty result,
e.g. an empty format, or %Z when the timezone
is unknown. */
if (buflen == 0 && outsize - outpos < 256 * fmtlen) {
outsize += outsize;
continue;
}
/* If the buffer is 256 times as long as the format,
it's probably not failing for lack of room!
More likely, the format yields an empty result,
e.g. an empty format, or %Z when the timezone
is unknown. */
outpos += buflen + 1;
if (fmtlen < (size_t)fmtsize) {
/* It was not terminating NUL, but an embedded NUL.
Skip the NUL and continue. */
fmt += fmtlen + 1;
fmtsize -= fmtlen + 1;
fmtlen = time_strlen(fmt);
continue;
}
#ifdef HAVE_WCSFTIME
ret = PyUnicode_FromWideChar(outbuf, buflen);
ret = PyUnicode_FromWideChar(outbuf, outpos - 1);
#else
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos - 1, "surrogateescape");
#endif
PyMem_Free(outbuf);
break;
}
PyMem_Free(outbuf);
break;
}
PyMem_Free(outbuf);
#ifdef HAVE_WCSFTIME
PyMem_Free(format);
#else
Expand Down

0 comments on commit 166b6fe

Please sign in to comment.