Commit 452f4150 authored by Paul Eggert's avatar Paul Eggert

Treat large integers as floats in the Lisp reader and in string-to-number.

parent 6703b2e4
2011-04-21 Paul Eggert <eggert@cs.ucla.edu>
Make the Lisp reader and string-to-float more consistent.
* data.c (atof): Remove decl; no longer used or needed.
(digit_to_number): Move to lread.c.
(Fstring_to_number): Use new string_to_number function, to be
consistent with how the Lisp reader treats infinities and NaNs.
Do not assume that floating-point numbers represent EMACS_INT
without losing information; this is not true on most 64-bit hosts.
Avoid double-rounding errors, by insisting on integers when
parsing non-base-10 numbers, as the documentation specifies.
* lisp.h (string_to_number): New decl, replacing ...
(isfloat_string): Remove.
* lread.c (read1): Do not accept +. and -. as integers; this
appears to have been a coding error. Similarly, do not accept
strings like +-1e0 as floating point numbers. Do not report
overflow for integer overflows unless the base is not 10 which
means we have no simple and reliable way to continue.
Break out the floating-point parsing into a new
function string_to_number, so that Fstring_to_number parses
floating point numbers consistently with the Lisp reader.
(digit_to_number): Moved here from data.c. Make it static inline.
(E_CHAR, EXP_INT): Remove, replacing with ...
(E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
(string_to_number): New function, replacing isfloat_string.
This function checks for valid syntax and produces the resulting
Lisp float number too. Rework it so that string-to-number
no longer mishandles examples like "1.0e+".
2011-04-20 Paul Eggert <eggert@cs.ucla.edu>
* textprop.c (set_text_properties_1): Rewrite for clarity,
......@@ -15,29 +44,6 @@
* alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
(overrun_check_free): Likewise.
Make the Lisp reader and string-to-float more consistent.
* data.c (atof): Remove decl; no longer used or needed.
(Fstring_to_number): Use new string_to_float function, to be
consistent with how the Lisp reader treats infinities and NaNs.
Do not assume that floating-point numbers represent EMACS_INT
without losing information; this is not true on most 64-bit hosts.
Avoid double-rounding errors, by insisting on integers when
parsing non-base-10 numbers, as the documentation specifies.
Report integer overflow instead of silently converting to
integers.
* lisp.h (string_to_float): New decl, replacing ...
(isfloat_string): Remove.
* lread.c (read1): Do not accept +. and -. as integers; this
appears to have been a coding error. Similarly, do not accept
strings like +-1e0 as floating point numbers. Do not report
overflow for some integer overflows and not others; instead,
report them all. Break out the floating-point parsing into a new
function string_to_float, so that Fstring_to_number parses
floating point numbers consistently with the Lisp reader.
(string_to_float): New function, replacing isfloat_string.
This function checks for valid syntax and produces the resulting
Lisp float number too.
* alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
word size.
......
......@@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number. */)
return build_string (buffer);
}
INLINE static int
digit_to_number (int character, int base)
{
int digit;
if (character >= '0' && character <= '9')
digit = character - '0';
else if (character >= 'a' && character <= 'z')
digit = character - 'a' + 10;
else if (character >= 'A' && character <= 'Z')
digit = character - 'A' + 10;
else
return -1;
if (digit >= base)
return -1;
else
return digit;
}
DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
doc: /* Parse STRING as a decimal number and return the number.
This parses both integers and floating point numbers.
......@@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer. */)
{
register char *p;
register int b;
EMACS_INT n;
Lisp_Object val;
CHECK_STRING (string);
......@@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer. */)
xsignal1 (Qargs_out_of_range, base);
}
/* Skip any whitespace at the front of the number. Typically strtol does
this anyway, so we might as well be consistent. */
p = SSDATA (string);
while (*p == ' ' || *p == '\t')
p++;
if (b == 10)
{
Lisp_Object val = string_to_float (p, 1);
if (FLOATP (val))
return val;
}
n = strtol (p, NULL, b);
if (FIXNUM_OVERFLOW_P (n))
xsignal (Qoverflow_error, list1 (string));
return make_number (n);
val = string_to_number (p, b, 1);
return NILP (val) ? make_number (0) : val;
}
enum arithop
{
......
......@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
} while (0)
extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object *, Lisp_Object);
Lisp_Object string_to_float (char const *, int);
Lisp_Object string_to_number (char const *, int, int);
extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
Lisp_Object);
extern void dir_warning (const char *, Lisp_Object);
......
......@@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
if (!quoted && !uninterned_symbol)
{
register char *p1;
Lisp_Object result;
p1 = read_buffer;
if (*p1 == '+' || *p1 == '-') p1++;
/* Is it an integer? */
if ('0' <= *p1 && *p1 <= '9')
{
do
p1++;
while ('0' <= *p1 && *p1 <= '9');
/* Integers can have trailing decimal points. */
p1 += (*p1 == '.');
if (p1 == p)
{
/* It is an integer. */
EMACS_INT n = strtol (read_buffer, NULL, 10);
if (FIXNUM_OVERFLOW_P (n))
xsignal (Qoverflow_error,
list1 (build_string (read_buffer)));
return make_number (n);
}
}
result = string_to_float (read_buffer, 0);
if (FLOATP (result))
Lisp_Object result = string_to_number (read_buffer, 10, 0);
if (! NILP (result))
return result;
}
{
......@@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
}
static inline int
digit_to_number (int character, int base)
{
int digit;
if ('0' <= character && character <= '9')
digit = character - '0';
else if ('a' <= character && character <= 'z')
digit = character - 'a' + 10;
else if ('A' <= character && character <= 'Z')
digit = character - 'A' + 10;
else
return -1;
return digit < base ? digit : -1;
}
#define LEAD_INT 1
#define DOT_CHAR 2
#define TRAIL_INT 4
#define E_CHAR 8
#define EXP_INT 16
#define E_EXP 16
/* Convert CP to a floating point number. Return a non-float value if CP does
not have valid floating point syntax. If IGNORE_TRAILING is nonzero,
consider just the longest prefix of CP that has valid floating point
syntax. */
/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has
integer syntax and fits in a fixnum, else return the nearest float if CP has
either floating point or integer syntax and BASE is 10, else return nil. If
IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
valid floating point syntax. Signal an overflow if BASE is not 10 and the
number has integer syntax but does not fit. */
Lisp_Object
string_to_float (char const *cp, int ignore_trailing)
string_to_number (char const *string, int base, int ignore_trailing)
{
int state;
const char *start = cp;
char const *cp = string;
int leading_digit;
int float_syntax = 0;
double value = 0;
/* Compute NaN and infinities using a variable, to cope with compilers that
think they are smarter than we are. */
......@@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
atof ("-0.0") drops the sign. */
int negative = *cp == '-';
double value = 0;
int signedp = negative || *cp == '+';
cp += signedp;
state = 0;
if (negative || *cp == '+')
cp++;
if (*cp >= '0' && *cp <= '9')
leading_digit = digit_to_number (*cp, base);
if (0 <= leading_digit)
{
state |= LEAD_INT;
while (*cp >= '0' && *cp <= '9')
cp++;
do
++cp;
while (0 <= digit_to_number (*cp, base));
}
if (*cp == '.')
{
state |= DOT_CHAR;
cp++;
}
if (*cp >= '0' && *cp <= '9')
{
state |= TRAIL_INT;
while (*cp >= '0' && *cp <= '9')
cp++;
}
if (*cp == 'e' || *cp == 'E')
{
state |= E_CHAR;
cp++;
if (*cp == '+' || *cp == '-')
cp++;
}
if (*cp >= '0' && *cp <= '9')
if (base == 10)
{
state |= EXP_INT;
while (*cp >= '0' && *cp <= '9')
cp++;
}
else if (cp == start)
;
else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
{
state |= EXP_INT;
cp += 3;
value = 1.0 / zero;
if ('0' <= *cp && *cp <= '9')
{
state |= TRAIL_INT;
do
cp++;
while ('0' <= *cp && *cp <= '9');
}
if (*cp == 'e' || *cp == 'E')
{
char const *ecp = cp;
cp++;
if (*cp == '+' || *cp == '-')
cp++;
if ('0' <= *cp && *cp <= '9')
{
state |= E_EXP;
do
cp++;
while ('0' <= *cp && *cp <= '9');
}
else if (cp[-1] == '+'
&& cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
{
state |= E_EXP;
cp += 3;
value = 1.0 / zero;
}
else if (cp[-1] == '+'
&& cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
{
state |= E_EXP;
cp += 3;
value = zero / zero;
/* If that made a "negative" NaN, negate it. */
{
int i;
union { double d; char c[sizeof (double)]; }
u_data, u_minus_zero;
u_data.d = value;
u_minus_zero.d = -0.0;
for (i = 0; i < sizeof (double); i++)
if (u_data.c[i] & u_minus_zero.c[i])
{
value = -value;
break;
}
}
/* Now VALUE is a positive NaN. */
}
else
cp = ecp;
}
float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
|| state == (LEAD_INT|E_EXP));
}
else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
{
state |= EXP_INT;
cp += 3;
value = zero / zero;
/* If that made a "negative" NaN, negate it. */
{
int i;
union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
/* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept
any prefix that matches. Otherwise, the entire string must match. */
if (! (ignore_trailing
? ((state & LEAD_INT) != 0 || float_syntax)
: (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
return Qnil;
u_data.d = value;
u_minus_zero.d = - 0.0;
for (i = 0; i < sizeof (double); i++)
if (u_data.c[i] & u_minus_zero.c[i])
/* If the number does not use float syntax, and fits into a fixnum, return
the fixnum. */
if (0 <= leading_digit && ! float_syntax)
{
/* Convert string to EMACS_INT. Do not use strtol, to avoid assuming
that EMACS_INT is no wider than 'long', and because when BASE is 16
strtol might accept numbers like "0x1" that are not allowed here. */
EMACS_INT n = leading_digit;
EMACS_INT abs_bound =
(negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
EMACS_INT abs_bound_over_base = abs_bound / base;
for (cp = string + signedp + 1; ; cp++)
{
int d = digit_to_number (*cp, base);
if (d < 0)
{
value = - value;
if (n <= abs_bound)
return make_number (negative ? -n : n);
break;
}
}
/* Now VALUE is a positive NaN. */
}
if (abs_bound_over_base < n)
break;
n = base * n + d;
}
if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
|| state == (DOT_CHAR|TRAIL_INT)
|| state == (LEAD_INT|E_CHAR|EXP_INT)
|| state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
|| state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
return make_number (0); /* Any non-float value will do. */
/* Unfortunately there's no simple and reliable way to convert
non-base-10 to floating point. */
if (base != 10)
xsignal (Qoverflow_error, list1 (build_string (string)));
}
/* Either the number uses float syntax, or it does not fit into a fixnum.
Convert it from string to floating point, unless the value is already
known because it is an infinity or a NAN. */
if (! value)
value = atof (start + negative);
value = atof (string + signedp);
if (negative)
value = - value;
value = -value;
return make_float (value);
}
static Lisp_Object
read_vector (Lisp_Object readcharfun, int bytecodeflag)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment