diff --git a/doc/lispref/internals.texi b/doc/lispref/internals.texi index e870d6e06e8008fdcf3d42f0700825f1536272d3..f1062a2f4d07388197cff38eff100ff9cc235f90 100644 --- a/doc/lispref/internals.texi +++ b/doc/lispref/internals.texi @@ -1475,6 +1475,42 @@ the widest integral data type supported by the C compiler, typically @code{overflow-error}. @end deftypefn +@deftypefn Function bool extract_big_integer (emacs_env *@var{env}, emacs_value @var{arg}, int *@var{sign}, ptrdiff_t *@var{count}, emacs_limb_t *@var{magnitude}) +This function, which is available since Emacs 27, extracts the +integral value of @var{arg}. The value of @var{arg} must be an +integer (fixnum or bignum). If @var{sign} is not @code{NULL}, it +stores the sign of @var{arg} (-1, 0, or +1) into @code{*sign}. The +magnitude is stored into @var{magnitude} as follows. If @var{count} +and @var{magnitude} are bot non-@code{NULL}, then @var{magnitude} must +point to an array of at least @code{*count} @code{unsigned long} +elements. If @var{magnitude} is large enough to hold the magnitude of +@var{arg}, then this function writes the magnitude into the +@var{magnitude} array in little-endian form, stores the number of +array elements written into @code{*count}, and returns @code{true}. +If @var{magnitude} is not large enough, it stores the required array +size into @code{*count}, signals an error, and returns @code{false}. +If @var{count} is not @code{NULL} and @var{magnitude} is @code{NULL}, +then the function stores the required array size into @code{*count} +and returns @code{true}. + +Emacs guarantees that the maximum required value of @code{*count} +never exceeds @code{min (PTRDIFF_MAX, SIZE_MAX) / sizeof +(emacs_limb_t)}. This implies that you can use e.g. @code{malloc +((size_t) (*count * sizeof (emacs_limb_t)))} to allocate the +@code{magnitude} array without integer overflow. +@end deftypefn + +@deftp {Type alias} emacs_limb_t +This type is an alias to an otherwise unspecified unsigned integral +type. It is used as element type for the magnitude arrays for the big +integer conversion functions. +@end deftp + +@defvr Macro EMACS_LIMB_MAX +This macro expands to an integer literal specifying the maximum +possible value for an @code{emacs_limb_t} object. +@end defvr + @deftypefn Function double extract_float (emacs_env *@var{env}, emacs_value @var{arg}) This function returns the value of a Lisp float specified by @var{arg}, as a C @code{double} value. @@ -1572,6 +1608,128 @@ limits set by @code{most-negative-fixnum} and @code{most-positive-fixnum} (@pxref{Integer Basics}). @end deftypefn +@deftypefn Function emacs_value make_big_integer (emacs_env *@var{env}, int sign, ptrdiff_t count, const emacs_limb_t *magnitude) +This function, which is available since Emacs 27, takes an +arbitrary-sized integer argument and returns a corresponding +@code{emacs_value} object. The @var{sign} argument gives the sign of +the return value. If @var{sign} is nonzero, then @var{magnitude} must +point to an array of at least @var{count} elements specifying the +little-endian magnitude of the return value. +@end deftypefn + +The following example uses the GNU Multiprecision Library (GMP) to +calculate the next probable prime after a given integer. +@xref{Top,,,gmp} for a general overview of GMP, and @pxref{Integer +Import and Export,,,gmp} for how to convert the @code{magnitude} array +to and from GMP @code{mpz_t} values. + +@example +#include +#include +#include +#include +#include + +#include + +#include + +static void +memory_full (emacs_env *env) +@{ + const char *message = "Memory exhausted"; + emacs_value data = env->make_string (env, message, strlen (message)); + env->non_local_exit_signal (env, env->intern (env, "error"), + env->funcall (env, env->intern (env, "list"), 1, + &data)); +@} + +enum +@{ + max_count = ((SIZE_MAX < PTRDIFF_MAX ? SIZE_MAX : PTRDIFF_MAX) + / sizeof (emacs_limb_t)) +@}; + +static bool +extract_big_integer (emacs_env *env, emacs_value arg, mpz_t result) +@{ + int sign; + ptrdiff_t count; + bool success = env->extract_big_integer (env, arg, &sign, &count, NULL); + if (!success) + return false; + if (sign == 0) + @{ + mpz_set_ui (result, 0); + return true; + @} + enum @{ order = -1, size = sizeof (emacs_limb_t), endian = 0, nails = 0 @}; + assert (0 < count && count <= max_count); + emacs_limb_t *magnitude = malloc ((size_t) (count * size)); + if (magnitude == NULL) + @{ + memory_full (env); + return false; + @} + success = env->extract_big_integer (env, arg, NULL, &count, magnitude); + assert (success); + mpz_import (result, count, order, size, endian, nails, magnitude); + free (magnitude); + if (sign < 0) + mpz_neg (result, result); + return true; +@} + +static emacs_value +make_big_integer (emacs_env *env, const mpz_t value) +@{ + if (mpz_sgn (value) == 0) + return env->make_integer (env, 0); + enum + @{ + order = -1, + size = sizeof (emacs_limb_t), + endian = 0, + nails = 0, + numb = 8 * size - nails + @}; + size_t count = (mpz_sizeinbase (value, 2) + numb - 1) / numb; + if (max_count < count) + @{ + memory_full (env); + return NULL; + @} + emacs_limb_t *magnitude = malloc (count * size); + if (magnitude == NULL) + @{ + memory_full (env); + return NULL; + @} + size_t written; + mpz_export (magnitude, &written, order, size, endian, nails, value); + assert (written == count); + assert (count <= PTRDIFF_MAX); + emacs_value result = env->make_big_integer (env, mpz_sgn (value), + (ptrdiff_t) count, magnitude); + free (magnitude); + return result; +@} + +static emacs_value +next_prime (emacs_env *env, ptrdiff_t nargs, emacs_value *args, + void *data) +@{ + assert (nargs == 1); + emacs_mpz p; + mpz_init (p); + extract_big_integer (env, args[0], p); + mpz_nextprime (p, p); + emacs_value result = make_big_integer (env, p); + mpz_clear (p); + return result; +@} +@end example + @deftypefn Function emacs_value make_float (emacs_env *@var{env}, double @var{d}) This function takes a @code{double} argument @var{d} and returns the corresponding Emacs floating-point value. @@ -1601,66 +1759,6 @@ function raises the @code{overflow-error} error condition if string. @end deftypefn -If you define the preprocessor macro @code{EMACS_MODULE_GMP} before -including the header @file{emacs-module.h}, you can also convert -between Emacs integers and GMP @code{mpz_t} values. @xref{GMP -Basics,,,gmp}. If @code{EMACS_MODULE_GMP} is defined, -@file{emacs-module.h} wraps @code{mpz_t} in the following structure: - -@deftp struct emacs_mpz value -struct emacs_mpz @{ mpz_t value; @}; -@end deftp - -@noindent -Then you can use the following additional functions: - -@deftypefn Function bool extract_big_integer (emacs_env *@var{env}, emacs_value @var{arg}, struct emacs_mpz *@var{result}) -This function, which is available since Emacs 27, extracts the -integral value of @var{arg} into @var{result}. @var{result} must not -be @code{NULL}. @code{@var{result}->value} must be an initialized -@code{mpz_t} object. @xref{Initializing Integers,,,gmp}. If -@var{arg} is an integer, Emacs will store its value into -@code{@var{result}->value}. After you have finished using -@code{@var{result}->value}, you should free it using @code{mpz_clear} -or similar. -@end deftypefn - -@deftypefn Function emacs_value make_big_integer (emacs_env *@var{env}, const struct emacs_mpz *@var{value}) -This function, which is available since Emacs 27, takes an -arbitrary-sized integer argument and returns a corresponding -@code{emacs_value} object. @var{value} must not be @code{NULL}. -@code{@var{value}->value} must be an initialized @code{mpz_t} object. -@xref{Initializing Integers,,,gmp}. Emacs will return a corresponding -integral object. After you have finished using -@code{@var{value}->value}, you should free it using @code{mpz_clear} -or similar. -@end deftypefn - -The following example uses GMP to calculate the next probable prime -after a given integer: - -@example -#include -#include - -#define EMACS_MODULE_GMP -#include - -static emacs_value -next_prime (emacs_env *env, ptrdiff_t nargs, emacs_value *args, - void *data) -@{ - assert (nargs == 1); - emacs_mpz p; - mpz_init (p.value); - env->extract_big_integer (env, args[0], &p); - mpz_nextprime (p.value, p.value); - emacs_value result = env->make_big_integer (env, &p); - mpz_clear (p.value); - return result; -@} -@end example - The @acronym{API} does not provide functions to manipulate Lisp data structures, for example, create lists with @code{cons} and @code{list} (@pxref{Building Lists}), extract list members with @code{car} and diff --git a/src/emacs-module.c b/src/emacs-module.c index 4b991a1c744b3ee505476a15670b9570586947e4..e5c88fd814ad96be2d98d433f2a490f72aea29ce 100644 --- a/src/emacs-module.c +++ b/src/emacs-module.c @@ -70,12 +70,6 @@ To add a new module function, proceed as follows: #include -#ifndef HAVE_GMP -#include "mini-gmp.h" -#define EMACS_MODULE_HAVE_MPZ_T -#endif - -#define EMACS_MODULE_GMP #include "emacs-module.h" #include @@ -772,21 +766,143 @@ module_make_time (emacs_env *env, struct timespec time) return lisp_to_value (env, timespec_to_lisp (time)); } -static void -module_extract_big_integer (emacs_env *env, emacs_value value, - struct emacs_mpz *result) +/* +Big integer support. + +There are two possible ways to support big integers in the module API +that have been discussed: + +1. Exposing GMP numbers (mpz_t) directly in the API. + +2. Isolating the API from GMP by converting to/from a custom + sign-magnitude representation. + +Approach (1) has the advantage of being faster (no import/export +required) and requiring less code in Emacs and in modules that would +use GMP anyway. However, (1) also couples big integer support +directly to the current implementation in Emacs (GMP). Also (1) +requires each module author to ensure that their module is linked to +the same GMP library as Emacs itself; in particular, module authors +can't link GMP statically. (1) also requires conditional compilation +and workarounds to ensure the module interface still works if GMP +isn't available while including emacs-module.h. It also means that +modules written in languages such as Go and Java that support big +integers without GMP now have to carry an otherwise unnecessary GMP +dependency. Approach (2), on the other hand, neatly decouples the +module interface from the GMP-based implementation. It's not +significantly more complex than (1) either: the additional code is +mostly straightforward. Over all, the benefits of (2) over (1) are +large enough to prefer it here. + +We use a simple sign-magnitude representation for the big integers. +For the magnitude we pick an array of an unsigned integer type similar +to mp_limb_t instead of e.g. unsigned char. This matches in most +cases the representation of a GMP limb. In such cases GMP picks an +optimized algorithm for mpz_import and mpz_export that boils down to a +single memcpy to convert the magnitude. This way we largely avoid the +import/export overhead on most platforms. +*/ + +enum { - MODULE_FUNCTION_BEGIN (); - Lisp_Object o = value_to_lisp (value); + /* Documented maximum count of magnitude elements. */ + module_bignum_count_max = min (SIZE_MAX, PTRDIFF_MAX) / sizeof (emacs_limb_t) +}; + +static bool +module_extract_big_integer (emacs_env *env, emacs_value arg, int *sign, + ptrdiff_t *count, emacs_limb_t *magnitude) +{ + MODULE_FUNCTION_BEGIN (false); + Lisp_Object o = value_to_lisp (arg); CHECK_INTEGER (o); - mpz_set_integer (result->value, o); + int dummy; + if (sign == NULL) + sign = &dummy; + /* See + https://gmplib.org/manual/Integer-Import-and-Export.html#index-Export. */ + enum + { + order = -1, + size = sizeof *magnitude, + bits = size * CHAR_BIT, + endian = 0, + nails = 0, + numb = 8 * size - nails + }; + if (FIXNUMP (o)) + { + EMACS_INT x = XFIXNUM (o); + *sign = (0 < x) - (x < 0); + if (x == 0 || count == NULL) + return true; + /* As a simplification we don't check how many array elements + are exactly required, but use a reasonable static upper + bound. For most architectures exactly one element should + suffice. */ + EMACS_UINT u; + enum { required = (sizeof u + size - 1) / size }; + verify (0 < required && required <= module_bignum_count_max); + if (magnitude == NULL) + { + *count = required; + return true; + } + if (*count < required) + { + ptrdiff_t actual = *count; + *count = required; + args_out_of_range_3 (INT_TO_INTEGER (actual), + INT_TO_INTEGER (required), + INT_TO_INTEGER (module_bignum_count_max)); + } + /* Set u = abs(x). See https://stackoverflow.com/a/17313717. */ + if (0 < x) + u = (EMACS_UINT) x; + else + u = -(EMACS_UINT) x; + verify (required * bits < PTRDIFF_MAX); + for (ptrdiff_t i = 0; i < required; ++i) + magnitude[i] = (emacs_limb_t) (u >> (i * bits)); + return true; + } + const mpz_t *x = xbignum_val (o); + *sign = mpz_sgn (*x); + if (count == NULL) + return true; + size_t required_size = (mpz_sizeinbase (*x, 2) + numb - 1) / numb; + eassert (required_size <= PTRDIFF_MAX); + ptrdiff_t required = (ptrdiff_t) required_size; + eassert (required <= module_bignum_count_max); + if (magnitude == NULL) + { + *count = required; + return true; + } + if (*count < required) + { + ptrdiff_t actual = *count; + *count = required; + args_out_of_range_3 (INT_TO_INTEGER (actual), INT_TO_INTEGER (required), + INT_TO_INTEGER (module_bignum_count_max)); + } + size_t written; + mpz_export (magnitude, &written, order, size, endian, nails, *x); + eassert (written == required_size); + return true; } static emacs_value -module_make_big_integer (emacs_env *env, const struct emacs_mpz *value) +module_make_big_integer (emacs_env *env, int sign, + ptrdiff_t count, const unsigned long *magnitude) { MODULE_FUNCTION_BEGIN (NULL); - mpz_set (mpz[0], value->value); + if (sign == 0) + return lisp_to_value (env, make_fixed_natnum (0)); + enum { order = -1, size = sizeof *magnitude, endian = 0, nails = 0 }; + mpz_import (mpz[0], count, order, size, endian, nails, magnitude); + if (sign < 0) + mpz_neg (mpz[0], mpz[0]); return lisp_to_value (env, make_integer_mpz ()); } diff --git a/src/emacs-module.h.in b/src/emacs-module.h.in index 9955e30eb7a5680f06ac7c8e3bff4b7e3f1b55d5..800c0188ff5b9bb8d827978851d76f75012831a3 100644 --- a/src/emacs-module.h.in +++ b/src/emacs-module.h.in @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see . */ #ifndef EMACS_MODULE_H #define EMACS_MODULE_H +#include #include #include #include @@ -28,10 +29,6 @@ along with GNU Emacs. If not, see . */ #include #endif -#if defined EMACS_MODULE_GMP && !defined EMACS_MODULE_HAVE_MPZ_T -#include -#endif - #define EMACS_MAJOR_VERSION @emacs_major_version@ #if defined __cplusplus && __cplusplus >= 201103L @@ -100,10 +97,21 @@ enum emacs_process_input_result emacs_process_input_quit = 1 }; -#ifdef EMACS_MODULE_GMP -struct emacs_mpz { mpz_t value; }; +/* +Implementation note: We define emacs_limb_t so that it is likely to +match the GMP mp_limb_t type. If the types match, GMP can use an +optimization for mpz_import and mpz_export that boils down to a +memcpy. According to https://gmplib.org/manual/ABI-and-ISA.html GMP +will prefer a 64-bit limb and will default to unsigned long if that is +wide enough. Note that this is an internal micro-optimization. Users +shouldn't rely on the exact size of emacs_limb_t. +*/ +#if ULONG_MAX == 0xFFFFFFFF +typedef unsigned long long emacs_limb_t; +# define EMACS_LIMB_MAX ULLONG_MAX #else -struct emacs_mpz; /* no definition */ +typedef unsigned long emacs_limb_t; +# define EMACS_LIMB_MAX ULONG_MAX #endif struct emacs_env_25 diff --git a/src/module-env-27.h b/src/module-env-27.h index 00de30090074a82f59b1f7eab56452f05bf7f929..da8ac0e7479c280d143d7fefcd8f7e5f7d96856a 100644 --- a/src/module-env-27.h +++ b/src/module-env-27.h @@ -9,10 +9,10 @@ emacs_value (*make_time) (emacs_env *env, struct timespec time) EMACS_ATTRIBUTE_NONNULL (1); - void (*extract_big_integer) (emacs_env *env, emacs_value value, - struct emacs_mpz *result) - EMACS_ATTRIBUTE_NONNULL (1, 3); + bool (*extract_big_integer) (emacs_env *env, emacs_value arg, int *sign, + ptrdiff_t *count, unsigned long *magnitude) + EMACS_ATTRIBUTE_NONNULL (1); - emacs_value (*make_big_integer) (emacs_env *env, - const struct emacs_mpz *value) - EMACS_ATTRIBUTE_NONNULL (1, 2); + emacs_value (*make_big_integer) (emacs_env *env, int sign, ptrdiff_t count, + const unsigned long *magnitude) + EMACS_ATTRIBUTE_NONNULL (1); diff --git a/test/data/emacs-module/mod-test.c b/test/data/emacs-module/mod-test.c index 2891b73c1a01044ebc55dc234dc9bff609b11ac6..b579c8a62784afbeae48fde6b9a41a224cd931e8 100644 --- a/test/data/emacs-module/mod-test.c +++ b/test/data/emacs-module/mod-test.c @@ -33,10 +33,8 @@ along with GNU Emacs. If not, see . */ #include #else #include "mini-gmp.h" -#define EMACS_MODULE_HAVE_MPZ_T #endif -#define EMACS_MODULE_GMP #include #include "timespec.h" @@ -66,6 +64,8 @@ int plugin_is_GPL_compatible; # error "INTPTR_MAX too large" #endif +/* Smoke test to verify that EMACS_LIMB_MAX is defined. */ +_Static_assert (0 < EMACS_LIMB_MAX, "EMACS_LIMB_MAX missing or incorrect"); /* Always return symbol 't'. */ static emacs_value @@ -372,23 +372,106 @@ Fmod_test_add_nanosecond (emacs_env *env, ptrdiff_t nargs, emacs_value *args, return env->make_time (env, time); } +static void +memory_full (emacs_env *env) +{ + const char *message = "Memory exhausted"; + emacs_value data = env->make_string (env, message, strlen (message)); + env->non_local_exit_signal (env, env->intern (env, "error"), + env->funcall (env, env->intern (env, "list"), 1, + &data)); +} + +enum +{ + max_count = ((SIZE_MAX < PTRDIFF_MAX ? SIZE_MAX : PTRDIFF_MAX) + / sizeof (emacs_limb_t)) +}; + +static bool +extract_big_integer (emacs_env *env, emacs_value arg, mpz_t result) +{ + int sign; + ptrdiff_t count; + bool success = env->extract_big_integer (env, arg, &sign, &count, NULL); + if (!success) + return false; + if (sign == 0) + { + mpz_set_ui (result, 0); + return true; + } + enum { order = -1, size = sizeof (unsigned long), endian = 0, nails = 0 }; + assert (0 < count && count <= max_count); + emacs_limb_t *magnitude = malloc (count * size); + if (magnitude == NULL) + { + memory_full (env); + return false; + } + success = env->extract_big_integer (env, arg, NULL, &count, magnitude); + assert (success); + mpz_import (result, count, order, size, endian, nails, magnitude); + free (magnitude); + if (sign < 0) + mpz_neg (result, result); + return true; +} + +static emacs_value +make_big_integer (emacs_env *env, const mpz_t value) +{ + if (mpz_sgn (value) == 0) + return env->make_integer (env, 0); + /* See + https://gmplib.org/manual/Integer-Import-and-Export.html#index-Export. */ + enum + { + order = -1, + size = sizeof (emacs_limb_t), + endian = 0, + nails = 0, + numb = 8 * size - nails + }; + size_t count = (mpz_sizeinbase (value, 2) + numb - 1) / numb; + if (max_count < count) + { + memory_full (env); + return NULL; + } + emacs_limb_t *magnitude = malloc (count * size); + if (magnitude == NULL) + { + memory_full (env); + return NULL; + } + size_t written; + mpz_export (magnitude, &written, order, size, endian, nails, value); + assert (written == count); + assert (count <= PTRDIFF_MAX); + emacs_value result = env->make_big_integer (env, mpz_sgn (value), + (ptrdiff_t) count, magnitude); + free (magnitude); + return result; +} + static emacs_value Fmod_test_nanoseconds (emacs_env *env, ptrdiff_t nargs, emacs_value *args, void *data) { assert (nargs == 1); struct timespec time = env->extract_time (env, args[0]); - struct emacs_mpz nanoseconds; + mpz_t nanoseconds; assert (LONG_MIN <= time.tv_sec && time.tv_sec <= LONG_MAX); - mpz_init_set_si (nanoseconds.value, time.tv_sec); + mpz_init_set_si (nanoseconds, time.tv_sec); #ifdef __MINGW32__ _Static_assert (1000000000 <= ULONG_MAX, "unsupported architecture"); #else static_assert (1000000000 <= ULONG_MAX, "unsupported architecture"); #endif - mpz_mul_ui (nanoseconds.value, nanoseconds.value, 1000000000); + mpz_mul_ui (nanoseconds, nanoseconds, 1000000000); assert (0 <= time.tv_nsec && time.tv_nsec <= ULONG_MAX); - mpz_add_ui (nanoseconds.value, nanoseconds.value, time.tv_nsec); - emacs_value result = env->make_big_integer (env, &nanoseconds); - mpz_clear (nanoseconds.value); + mpz_add_ui (nanoseconds, nanoseconds, time.tv_nsec); + emacs_value result = make_big_integer (env, nanoseconds); + mpz_clear (nanoseconds); return result; } @@ -398,12 +481,12 @@ Fmod_test_double (emacs_env *env, ptrdiff_t nargs, emacs_value *args, { assert (nargs == 1); emacs_value arg = args[0]; - struct emacs_mpz value; - mpz_init (value.value); - env->extract_big_integer (env, arg, &value); - mpz_mul_ui (value.value, value.value, 2); - emacs_value result = env->make_big_integer (env, &value); - mpz_clear (value.value); + mpz_t value; + mpz_init (value); + extract_big_integer (env, arg, value); + mpz_mul_ui (value, value, 2); + emacs_value result = make_big_integer (env, value); + mpz_clear (value); return result; }