Commit db2fed3b authored by Paul Eggert's avatar Paul Eggert

Several fixes for formatting bignums

* src/bignum.c: Include stdlib.h, for abs.
(bignum_bufsize, bignum_to_c_string): New functions.
* src/bignum.c (bignum_to_string):
* src/print.c (print_vectorlike): Use them.
* src/editfns.c (styled_format): Instead of having a separate
buffer for sprintf (which does not work for bignums), just append
to the main buffer.  When formatting bignums, add support for the
standard integer flags -, #, 0, + and space.  Fix some comments.
Capitalize properly when formatting bignums with %X.  Use
functions like c_isdigit rather than reinventing the wheel.
Simplify computation of excess precision.
* src/print.c: Do not include bignum.h; no longer needed.
(print_vectorlike): Avoid recalculating string length.
* test/src/editfns-tests.el (format-bignum):
Test some of the above fixes.
parent a451c6ec
......@@ -23,6 +23,8 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "lisp.h"
#include <stdlib.h>
/* Return the value of the Lisp bignum N, as a double. */
double
bignum_to_double (Lisp_Object n)
......@@ -223,18 +225,39 @@ bignum_to_uintmax (Lisp_Object x)
return v;
}
/* Convert NUM to a base-BASE Lisp string. */
/* Yield an upper bound on the buffer size needed to contain a C
string representing the bignum NUM in base BASE. This includes any
preceding '-' and the terminating null. */
ptrdiff_t
bignum_bufsize (Lisp_Object num, int base)
{
return mpz_sizeinbase (XBIGNUM (num)->value, base) + 2;
}
/* Store into BUF (of size SIZE) the value of NUM as a base-BASE string.
If BASE is negative, use upper-case digits in base -BASE.
Return the string's length.
SIZE must equal bignum_bufsize (NUM, abs (BASE)). */
ptrdiff_t
bignum_to_c_string (char *buf, ptrdiff_t size, Lisp_Object num, int base)
{
eassert (bignum_bufsize (num, abs (base)) == size);
mpz_get_str (buf, base, XBIGNUM (num)->value);
ptrdiff_t n = size - 2;
return !buf[n - 1] ? n - 1 : n + !!buf[n];
}
/* Convert NUM to a base-BASE Lisp string.
If BASE is negative, use upper-case digits in base -BASE. */
Lisp_Object
bignum_to_string (Lisp_Object num, int base)
{
ptrdiff_t n = mpz_sizeinbase (XBIGNUM (num)->value, base) - 1;
ptrdiff_t size = bignum_bufsize (num, abs (base));
USE_SAFE_ALLOCA;
char *str = SAFE_ALLOCA (n + 3);
mpz_get_str (str, base, XBIGNUM (num)->value);
while (str[n])
n++;
Lisp_Object result = make_unibyte_string (str, n);
char *str = SAFE_ALLOCA (size);
ptrdiff_t len = bignum_to_c_string (str, size, num, base);
Lisp_Object result = make_unibyte_string (str, len);
SAFE_FREE ();
return result;
}
......
......@@ -4232,8 +4232,26 @@ usage: (format-message STRING &rest OBJECTS) */)
static Lisp_Object
styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
{
enum
{
/* Maximum precision for a %f conversion such that the trailing
output digit might be nonzero. Any precision larger than this
will not yield useful information. */
USEFUL_PRECISION_MAX = ((1 - LDBL_MIN_EXP)
* (FLT_RADIX == 2 || FLT_RADIX == 10 ? 1
: FLT_RADIX == 16 ? 4
: -1)),
/* Maximum number of bytes (including terminating null) generated
by any format, if precision is no more than USEFUL_PRECISION_MAX.
On all practical hosts, %Lf is the worst case. */
SPRINTF_BUFSIZE = (sizeof "-." + (LDBL_MAX_10_EXP + 1)
+ USEFUL_PRECISION_MAX)
};
verify (USEFUL_PRECISION_MAX > 0);
ptrdiff_t n; /* The number of the next arg to substitute. */
char initial_buffer[4000];
char initial_buffer[1000 + SPRINTF_BUFSIZE];
char *buf = initial_buffer;
ptrdiff_t bufsize = sizeof initial_buffer;
ptrdiff_t max_bufsize = STRING_BYTES_BOUND + 1;
......@@ -4338,8 +4356,14 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
char const *convsrc = format;
unsigned char format_char = *format++;
/* Bytes needed to represent the output of this conversion. */
/* Number of bytes to be preallocated for the next directive's
output. At the end of each iteration this is at least
CONVBYTES_ROOM, and is greater if the current directive
output was so large that it will be retried after buffer
reallocation. */
ptrdiff_t convbytes = 1;
enum { CONVBYTES_ROOM = SPRINTF_BUFSIZE - 1 };
eassert (p <= buf + bufsize - SPRINTF_BUFSIZE);
if (format_char == '%')
{
......@@ -4473,23 +4497,6 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
conversion = 's';
zero_flag = false;
}
else if ((conversion == 'd' || conversion == 'i'
|| conversion == 'o' || conversion == 'x'
|| conversion == 'X')
&& BIGNUMP (arg))
{
int base = 10;
if (conversion == 'o')
base = 8;
else if (conversion == 'x')
base = 16;
else if (conversion == 'X')
base = -16;
arg = bignum_to_string (arg, base);
conversion = 's';
}
if (SYMBOLP (arg))
{
......@@ -4592,7 +4599,7 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
spec->intervals = arg_intervals = true;
new_result = true;
continue;
convbytes = CONVBYTES_ROOM;
}
}
else if (! (conversion == 'c' || conversion == 'd'
......@@ -4606,28 +4613,8 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
error ("Format specifier doesn't match argument type");
else
{
enum
{
/* Maximum precision for a %f conversion such that the
trailing output digit might be nonzero. Any precision
larger than this will not yield useful information. */
USEFUL_PRECISION_MAX =
((1 - LDBL_MIN_EXP)
* (FLT_RADIX == 2 || FLT_RADIX == 10 ? 1
: FLT_RADIX == 16 ? 4
: -1)),
/* Maximum number of bytes generated by any format, if
precision is no more than USEFUL_PRECISION_MAX.
On all practical hosts, %f is the worst case. */
SPRINTF_BUFSIZE =
sizeof "-." + (LDBL_MAX_10_EXP + 1) + USEFUL_PRECISION_MAX,
/* Length of pM (that is, of pMd without the
trailing "d"). */
pMlen = sizeof pMd - 2
};
verify (USEFUL_PRECISION_MAX > 0);
/* Length of pM (that is, of pMd without the trailing "d"). */
enum { pMlen = sizeof pMd - 2 };
/* Avoid undefined behavior in underlying sprintf. */
if (conversion == 'd' || conversion == 'i')
......@@ -4660,18 +4647,24 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
if (precision_given)
prec = min (precision, USEFUL_PRECISION_MAX);
/* Use sprintf to format this number into sprintf_buf. Omit
/* Characters to be inserted after spaces and before
leading zeros. This can occur with bignums, since
string_to_bignum does only leading '-'. */
char prefix[sizeof "-0x" - 1];
int prefixlen = 0;
/* Use sprintf or bignum_to_string to format this number. Omit
padding and excess precision, though, because sprintf limits
output length to INT_MAX.
output length to INT_MAX and bignum_to_string doesn't
do padding or precision.
There are four types of conversion: double, unsigned
Use five sprintf conversions: double, long double, unsigned
char (passed as int), wide signed int, and wide
unsigned int. Treat them separately because the
sprintf ABI is sensitive to which type is passed. Be
careful about integer overflow, NaNs, infinities, and
conversions; for example, the min and max macros are
not suitable here. */
char sprintf_buf[SPRINTF_BUFSIZE];
ptrdiff_t sprintf_bytes;
if (float_conversion)
{
......@@ -4729,26 +4722,43 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
f[-1] = 'L';
*f++ = conversion;
*f = '\0';
sprintf_bytes = sprintf (sprintf_buf, convspec, prec,
ldarg);
sprintf_bytes = sprintf (p, convspec, prec, ldarg);
}
else
sprintf_bytes = sprintf (sprintf_buf, convspec, prec,
darg);
sprintf_bytes = sprintf (p, convspec, prec, darg);
}
else if (conversion == 'c')
{
/* Don't use sprintf here, as it might mishandle prec. */
sprintf_buf[0] = XFIXNUM (arg);
p[0] = XFIXNUM (arg);
p[1] = '\0';
sprintf_bytes = prec != 0;
sprintf_buf[sprintf_bytes] = '\0';
}
else if (BIGNUMP (arg))
{
int base = ((conversion == 'd' || conversion == 'i') ? 10
: conversion == 'o' ? 8 : 16);
sprintf_bytes = bignum_bufsize (arg, base);
if (sprintf_bytes <= buf + bufsize - p)
{
int signedbase = conversion == 'X' ? -base : base;
sprintf_bytes = bignum_to_c_string (p, sprintf_bytes,
arg, signedbase);
bool negative = p[0] == '-';
prec = min (precision, sprintf_bytes - prefixlen);
prefix[prefixlen] = plus_flag ? '+' : ' ';
prefixlen += (plus_flag | space_flag) & !negative;
prefix[prefixlen] = '0';
prefix[prefixlen + 1] = conversion;
prefixlen += sharp_flag && base == 16 ? 2 : 0;
}
}
else if (conversion == 'd' || conversion == 'i')
{
if (FIXNUMP (arg))
{
printmax_t x = XFIXNUM (arg);
sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
sprintf_bytes = sprintf (p, convspec, prec, x);
}
else
{
......@@ -4760,9 +4770,8 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
x = trunc (x);
x = x ? x : 0;
sprintf_bytes = sprintf (sprintf_buf, convspec, 0, x);
char c0 = sprintf_buf[0];
bool signedp = ! ('0' <= c0 && c0 <= '9');
sprintf_bytes = sprintf (p, convspec, 0, x);
bool signedp = ! c_isdigit (p[0]);
prec = min (precision, sprintf_bytes - signedp);
}
}
......@@ -4793,10 +4802,9 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
x = d;
negative = false;
}
sprintf_buf[0] = negative ? '-' : plus_flag ? '+' : ' ';
p[0] = negative ? '-' : plus_flag ? '+' : ' ';
bool signedp = negative | plus_flag | space_flag;
sprintf_bytes = sprintf (sprintf_buf + signedp,
convspec, prec, x);
sprintf_bytes = sprintf (p + signedp, convspec, prec, x);
sprintf_bytes += signedp;
}
......@@ -4804,112 +4812,126 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
padding and excess precision. Deal with excess precision
first. This happens when the format specifies ridiculously
large precision, or when %d or %i formats a float that would
ordinarily need fewer digits than a specified precision. */
ordinarily need fewer digits than a specified precision,
or when a bignum is formatted using an integer format
with enough precision. */
ptrdiff_t excess_precision
= precision_given ? precision - prec : 0;
ptrdiff_t leading_zeros = 0, trailing_zeros = 0;
if (excess_precision)
ptrdiff_t trailing_zeros = 0;
if (excess_precision != 0 && float_conversion)
{
if (float_conversion)
{
if ((conversion == 'g' && ! sharp_flag)
|| ! ('0' <= sprintf_buf[sprintf_bytes - 1]
&& sprintf_buf[sprintf_bytes - 1] <= '9'))
excess_precision = 0;
else
{
if (conversion == 'g')
{
char *dot = strchr (sprintf_buf, '.');
if (!dot)
excess_precision = 0;
}
}
trailing_zeros = excess_precision;
}
else
leading_zeros = excess_precision;
if (! c_isdigit (p[sprintf_bytes - 1])
|| (conversion == 'g'
&& ! (sharp_flag && strchr (p, '.'))))
excess_precision = 0;
trailing_zeros = excess_precision;
}
ptrdiff_t leading_zeros = excess_precision - trailing_zeros;
/* Compute the total bytes needed for this item, including
excess precision and padding. */
ptrdiff_t numwidth;
if (INT_ADD_WRAPV (sprintf_bytes, excess_precision, &numwidth))
if (INT_ADD_WRAPV (prefixlen + sprintf_bytes, excess_precision,
&numwidth))
numwidth = PTRDIFF_MAX;
ptrdiff_t padding
= numwidth < field_width ? field_width - numwidth : 0;
if (max_bufsize - sprintf_bytes <= excess_precision
if (max_bufsize - (prefixlen + sprintf_bytes) <= excess_precision
|| max_bufsize - padding <= numwidth)
string_overflow ();
convbytes = numwidth + padding;
if (convbytes <= buf + bufsize - p)
{
/* Copy the formatted item from sprintf_buf into buf,
inserting padding and excess-precision zeros. */
char *src = sprintf_buf;
char src0 = src[0];
int exponent_bytes = 0;
bool signedp = src0 == '-' || src0 == '+' || src0 == ' ';
int prefix_bytes = (signedp
+ ((src[signedp] == '0'
&& (src[signedp + 1] == 'x'
|| src[signedp + 1] == 'X'))
? 2 : 0));
if (zero_flag)
bool signedp = p[0] == '-' || p[0] == '+' || p[0] == ' ';
int beglen = (signedp
+ ((p[signedp] == '0'
&& (p[signedp + 1] == 'x'
|| p[signedp + 1] == 'X'))
? 2 : 0));
eassert (prefixlen == 0 || beglen == 0
|| (beglen == 1 && p[0] == '-'
&& ! (prefix[0] == '-' || prefix[0] == '+'
|| prefix[0] == ' ')));
if (zero_flag && 0 <= char_hexdigit (p[beglen]))
{
unsigned char after_prefix = src[prefix_bytes];
if (0 <= char_hexdigit (after_prefix))
{
leading_zeros += padding;
padding = 0;
}
leading_zeros += padding;
padding = 0;
}
if (leading_zeros == 0 && sharp_flag && conversion == 'o'
&& p[beglen] != '0')
{
leading_zeros++;
padding -= padding != 0;
}
if (excess_precision
int endlen = 0;
if (trailing_zeros
&& (conversion == 'e' || conversion == 'g'))
{
char *e = strchr (src, 'e');
char *e = strchr (p, 'e');
if (e)
exponent_bytes = src + sprintf_bytes - e;
endlen = p + sprintf_bytes - e;
}
spec->start = nchars;
if (! minus_flag)
{
memset (p, ' ', padding);
p += padding;
nchars += padding;
}
ptrdiff_t midlen = sprintf_bytes - beglen - endlen;
ptrdiff_t leading_padding = minus_flag ? 0 : padding;
ptrdiff_t trailing_padding = padding - leading_padding;
memcpy (p, src, prefix_bytes);
p += prefix_bytes;
src += prefix_bytes;
memset (p, '0', leading_zeros);
p += leading_zeros;
int significand_bytes
= sprintf_bytes - prefix_bytes - exponent_bytes;
memcpy (p, src, significand_bytes);
p += significand_bytes;
src += significand_bytes;
memset (p, '0', trailing_zeros);
p += trailing_zeros;
memcpy (p, src, exponent_bytes);
p += exponent_bytes;
nchars += leading_zeros + sprintf_bytes + trailing_zeros;
/* Insert padding and excess-precision zeros. The output
contains the following components, in left-to-right order:
if (minus_flag)
LEADING_PADDING spaces.
BEGLEN bytes taken from the start of sprintf output.
PREFIXLEN bytes taken from the start of the prefix array.
LEADING_ZEROS zeros.
MIDLEN bytes taken from the middle of sprintf output.
TRAILING_ZEROS zeros.
ENDLEN bytes taken from the end of sprintf output.
TRAILING_PADDING spaces.
The sprintf output is taken from the buffer starting at
P and continuing for SPRINTF_BYTES bytes. */
ptrdiff_t incr
= (padding + leading_zeros + prefixlen
+ sprintf_bytes + trailing_zeros);
/* Optimize for the typical case with padding or zeros. */
if (incr != sprintf_bytes)
{
memset (p, ' ', padding);
p += padding;
nchars += padding;
/* Move data to make room to insert spaces and '0's.
As this may entail overlapping moves, process
the output right-to-left and use memmove.
With any luck this code is rarely executed. */
char *src = p + sprintf_bytes;
char *dst = p + incr;
dst -= trailing_padding;
memset (dst, ' ', trailing_padding);
src -= endlen;
dst -= endlen;
memmove (dst, src, endlen);
dst -= trailing_zeros;
memset (dst, '0', trailing_zeros);
src -= midlen;
dst -= midlen;
memmove (dst, src, midlen);
dst -= leading_zeros;
memset (dst, '0', leading_zeros);
dst -= prefixlen;
memcpy (dst, prefix, prefixlen);
src -= beglen;
dst -= beglen;
memmove (dst, src, beglen);
dst -= leading_padding;
memset (dst, ' ', leading_padding);
}
spec->end = nchars;
p += incr;
spec->start = nchars;
spec->end = nchars += incr;
new_result = true;
continue;
convbytes = CONVBYTES_ROOM;
}
}
}
......@@ -4962,42 +4984,51 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
}
copy_char:
if (convbytes <= buf + bufsize - p)
{
memcpy (p, convsrc, convbytes);
p += convbytes;
nchars++;
continue;
}
memcpy (p, convsrc, convbytes);
p += convbytes;
nchars++;
convbytes = CONVBYTES_ROOM;
}
/* There wasn't enough room to store this conversion or single
character. CONVBYTES says how much room is needed. Allocate
enough room (and then some) and do it again. */
ptrdiff_t used = p - buf;
if (max_bufsize - used < convbytes)
ptrdiff_t buflen_needed;
if (INT_ADD_WRAPV (used, convbytes, &buflen_needed))
string_overflow ();
bufsize = used + convbytes;
bufsize = bufsize < max_bufsize / 2 ? bufsize * 2 : max_bufsize;
if (buf == initial_buffer)
if (bufsize <= buflen_needed)
{
buf = xmalloc (bufsize);
buf_save_value_index = SPECPDL_INDEX ();
record_unwind_protect_ptr (xfree, buf);
memcpy (buf, initial_buffer, used);
}
else
{
buf = xrealloc (buf, bufsize);
set_unwind_protect_ptr (buf_save_value_index, xfree, buf);
}
if (max_bufsize <= buflen_needed)
string_overflow ();
p = buf + used;
format = format0;
n = n0;
ispec = ispec0;
/* Either there wasn't enough room to store this conversion,
or there won't be enough room to do a sprintf the next
time through the loop. Allocate enough room (and then some). */
bufsize = (buflen_needed <= max_bufsize / 2
? buflen_needed * 2 : max_bufsize);
if (buf == initial_buffer)
{
buf = xmalloc (bufsize);
buf_save_value_index = SPECPDL_INDEX ();
record_unwind_protect_ptr (xfree, buf);
memcpy (buf, initial_buffer, used);
}
else
{
buf = xrealloc (buf, bufsize);
set_unwind_protect_ptr (buf_save_value_index, xfree, buf);
}
p = buf + used;
if (convbytes != CONVBYTES_ROOM)
{
/* There wasn't enough room for this conversion; do it over. */
eassert (CONVBYTES_ROOM < convbytes);
format = format0;
n = n0;
ispec = ispec0;
}
}
}
if (bufsize < p - buf)
......
......@@ -3278,9 +3278,12 @@ set_sub_char_table_contents (Lisp_Object table, ptrdiff_t idx, Lisp_Object val)
XSUB_CHAR_TABLE (table)->contents[idx] = val;
}
/* Defined in bignum.c. */
/* Defined in bignum.c. This part of bignum.c's API does not require
the caller to access bignum internals; see bignum.h for that. */
extern intmax_t bignum_to_intmax (Lisp_Object);
extern uintmax_t bignum_to_uintmax (Lisp_Object);
extern ptrdiff_t bignum_bufsize (Lisp_Object, int);
extern ptrdiff_t bignum_to_c_string (char *, ptrdiff_t, Lisp_Object, int);
extern Lisp_Object bignum_to_string (Lisp_Object, int);
extern Lisp_Object make_bignum_str (char const *, int);
extern Lisp_Object double_to_bignum (double);
......
......@@ -23,7 +23,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "sysstdio.h"
#include "lisp.h"
#include "bignum.h"
#include "character.h"
#include "coding.h"
#include "buffer.h"
......@@ -1370,11 +1369,11 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag,
{
case PVEC_BIGNUM:
{
ptrdiff_t size = bignum_bufsize (obj, 10);
USE_SAFE_ALLOCA;
char *str = SAFE_ALLOCA (mpz_sizeinbase (XBIGNUM (obj)->value, 10)
+ 2);
mpz_get_str (str, 10, XBIGNUM (obj)->value);
print_c_string (str, printcharfun);
char *str = SAFE_ALLOCA (size);
ptrdiff_t len = bignum_to_c_string (str, size, obj, 10);
strout (str, len, len, printcharfun);
SAFE_FREE ();
}
break;
......
......@@ -381,10 +381,23 @@
(let* ((s1 "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF")
(v1 (read (concat "#x" s1)))
(s2 "99999999999999999999999999999999")
(v2 (read s2)))
(v2 (read s2))
(v3 #x-3ffffffffffffffe000000000000000))
(should (> v1 most-positive-fixnum))
(should (equal (format "%X" v1) s1))
(should (> v2 most-positive-fixnum))
(should (equal (format "%d" v2) s2))))
(should (equal (format "%d" v2) s2))
(should (equal (format "%d" v3) "-5316911983139663489309385231907684352"))
(should (equal (format "%+d" v3) "-5316911983139663489309385231907684352"))
(should (equal (format "%+d" (- v3))
"+5316911983139663489309385231907684352"))
(should (equal (format "% d" (- v3))
" 5316911983139663489309385231907684352"))
(should (equal (format "%o" v3)
"-37777777777777777777600000000000000000000"))
(should (equal (format "%#50.40x" v3)
" -0x000000003ffffffffffffffe000000000000000"))
(should (equal (format "%-#50.40x" v3)
"-0x000000003ffffffffffffffe000000000000000 "))))
;;; editfns-tests.el ends here
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment