Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

csplit: do not rely on undefined behavior in printf formats

* doc/coreutils.texi (csplit invocation): Say that %d and %i are
aliases for %u.
* src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants.
(get_format_flags): Now take char const * and int * and return
size_t.  It now stores info about the flags instead of merely
scanning them.  Also, it handles '0' correctly.  Drop support for
the undocumented '+' and ' ' flags since the value is unsigned.
Add support for the (undocumented) "'" flag.  All uses changed.
(get_format_width, get_format_prec): Remove.
(check_format_conv_type): Renamed from get_format_conv_type, with
a different signature.  It now converts the format to one that is
compatible with unsigned int, and checks flags.  All uses changed.
(max_out): Have snprintf compute the number of bytes needed rather
than attempting to do it ourselves (which doesn't work portably
with outlandish formats such as %4294967296d).
(check_format_conv_type, main): Check for overflow in size
calculations.  Don't assume size_t fits in unsigned int.
* tests/misc/csplit: Check for proper handling of flags, with
%0#6.3x.  Coreutils 8.6 mishandles this somewhat-weird example.
  • Loading branch information...
commit 6568b173db1c98f39a53eadd9b09e0a0e5c11920 1 parent 0339eb4
Paul Eggert eggert authored Jim Meyering committed
Showing with 81 additions and 78 deletions.
  1. +2 −1  doc/coreutils.texi
  2. +64 −77 src/csplit.c
  3. +15 −0 tests/misc/csplit
3  doc/coreutils.texi
View
@@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one
@code{printf(3)}-style conversion specification, possibly including
format specification flags, a field width, a precision specifications,
or all of these kinds of modifiers. The format letter must convert a
-binary integer argument to readable form; thus, only @samp{d}, @samp{i},
+binary unsigned integer argument to readable form. The format letters
+@samp{d} and @samp{i} are aliases for @samp{u}, and the
@samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The
entire @var{suffix} is given (with the current output file number) to
@code{sprintf(3)} to form the file name suffixes for each of the
141 src/csplit.c
View
@@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv)
}
}
-static unsigned int
-get_format_flags (char **format_ptr)
+
+
+/* Names for the printf format flags ' and #. These can be ORed together. */
+enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
+
+/* Scan the printf format flags in FORMAT, storing info about the
+ flags into *FLAGS_PTR. Return the number of flags found. */
+static size_t
+get_format_flags (char const *format, int *flags_ptr)
{
- unsigned int count = 0;
+ int flags = 0;
- for (; **format_ptr; (*format_ptr)++)
+ for (size_t count = 0; ; count++)
{
- switch (**format_ptr)
+ switch (format[count])
{
case '-':
+ case '0':
break;
- case '+':
- case ' ':
- count |= 1;
+ case '\'':
+ flags |= FLAG_THOUSANDS;
break;
case '#':
- count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
+ flags |= FLAG_ALTERNATIVE;
break;
default:
+ *flags_ptr = flags;
return count;
}
}
- return count;
-}
-
-static size_t
-get_format_width (char **format_ptr)
-{
- unsigned long int val = 0;
-
- if (ISDIGIT (**format_ptr)
- && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
- || SIZE_MAX < val))
- error (EXIT_FAILURE, 0, _("invalid format width"));
-
- /* Allow for enough octal digits to represent the value of UINT_MAX,
- even if the field width is less than that. */
- return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
-}
-
-static size_t
-get_format_prec (char **format_ptr)
-{
- if (**format_ptr != '.')
- return 0;
- (*format_ptr)++;
-
- if (! ISDIGIT (**format_ptr))
- return 0;
- else
- {
- unsigned long int val;
- if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
- || SIZE_MAX < val)
- error (EXIT_FAILURE, 0, _("invalid format precision"));
- return val;
- }
}
+/* Check that the printf format conversion specifier *FORMAT is valid
+ and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
+ since the format will be used with an unsigned value. */
static void
-get_format_conv_type (char **format_ptr)
+check_format_conv_type (char *format, int flags)
{
- unsigned char ch = *(*format_ptr)++;
+ unsigned char ch = *format;
+ int compatible_flags = FLAG_THOUSANDS;
switch (ch)
{
case 'd':
case 'i':
- case 'o':
+ *format = 'u';
+ break;
+
case 'u':
+ break;
+
+ case 'o':
case 'x':
case 'X':
+ compatible_flags = FLAG_ALTERNATIVE;
break;
case 0:
@@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr)
error (EXIT_FAILURE, 0,
_("invalid conversion specifier in suffix: \\%.3o"), ch);
}
+
+ if (flags & ~ compatible_flags)
+ error (EXIT_FAILURE, 0,
+ _("invalid flags in conversion specification: %%%c%c"),
+ (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
}
+/* Return the maximum number of bytes that can be generated by
+ applying FORMAT to an unsigned int value. If the format is
+ invalid, diagnose the problem and exit. */
static size_t
max_out (char *format)
{
- size_t out_count = 0;
bool percent = false;
- while (*format)
- {
- if (*format++ != '%')
- out_count++;
- else if (*format == '%')
- {
- format++;
- out_count++;
- }
- else
- {
- if (percent)
- error (EXIT_FAILURE, 0,
- _("too many %% conversion specifications in suffix"));
- percent = true;
- out_count += get_format_flags (&format);
- {
- size_t width = get_format_width (&format);
- size_t prec = get_format_prec (&format);
-
- out_count += MAX (width, prec);
- }
- get_format_conv_type (&format);
- }
- }
+ for (char *f = format; *f; f++)
+ if (*f == '%' && *++f != '%')
+ {
+ if (percent)
+ error (EXIT_FAILURE, 0,
+ _("too many %% conversion specifications in suffix"));
+ percent = true;
+ unsigned int flags;
+ f += get_format_flags (f, &flags);
+ while (ISDIGIT (*f))
+ f++;
+ if (*f == '.')
+ while (ISDIGIT (*++f))
+ continue;
+ check_format_conv_type (f, flags);
+ }
if (! percent)
error (EXIT_FAILURE, 0,
_("missing %% conversion specification in suffix"));
- return out_count;
+ int maxlen = snprintf (NULL, 0, format, UINT_MAX);
+ if (! (0 <= maxlen && maxlen <= SIZE_MAX))
+ xalloc_die ();
+ return maxlen;
}
int
@@ -1349,7 +1333,7 @@ main (int argc, char **argv)
case 'n':
if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
- || val > INT_MAX)
+ || MIN (INT_MAX, SIZE_MAX) < val)
error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
digits = val;
break;
@@ -1380,11 +1364,14 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
- unsigned int max_digit_string_len
+ size_t prefix_len = strlen (prefix);
+ size_t max_digit_string_len
= (suffix
? max_out (suffix)
: MAX (INT_STRLEN_BOUND (unsigned int), digits));
- filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1);
+ if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
+ xalloc_die ();
+ filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
set_input_file (argv[optind++]);
15 tests/misc/csplit
View
@@ -67,6 +67,21 @@ EOF
compare err experr || fail=1
rm -f in out exp err experr
+# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format
+# up through coreutils 8.6.
+echo > in
+csplit -b '%0#6.3x' in 1 > out 2> err || fail=1
+cat <<EOF > exp
+0
+1
+EOF
+compare out exp || fail=1
+touch experr
+compare err experr || fail=1
+compare 'xx 000' experr || fail=1
+compare 'xx 0x001' in || fail=1
+rm -f in out exp err experr xx*
+
# make sure `csplit FILE 0' fails.
echo > in
csplit in 0 > out 2> err && fail=1
Please sign in to comment.
Something went wrong with that request. Please try again.