Commit 75ee2036 authored by Philipp Stephani's avatar Philipp Stephani

Refactoring: move UTF-8 decoding functions into coding.h.

json_make_string and json_build_string are generally useful and not
JSON-specific.  Move them to coding.[ch].

* src/coding.h (build_utf8_string): Move from json.c.

* src/coding.c (make_utf8_string): Move from json.c.

* src/json.c (json_make_string, json_build_string): Move to
coding.[ch].  Split out JSON-specific comment.
(json_parse_error, Fjson_serialize, json_to_lisp): Fix callers.

* src/emacs-module.c (module_make_function, module_make_string): Use
new functions.
(module_decode, module_decode_copy): Remove.
parent dbe81e16
Pipeline #1485 passed with stage
in 50 minutes and 24 seconds
......@@ -6353,6 +6353,25 @@ utf8_string_p (Lisp_Object string)
return check_utf_8 (&coding) != -1;
}
Lisp_Object
make_utf8_string (const char *data, ptrdiff_t size)
{
ptrdiff_t chars, bytes;
parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
/* If DATA is a valid UTF-8 string, we can convert it to a Lisp
string directly. Otherwise, we need to decode it. */
if (chars == size || bytes == size)
return make_specified_string (data, chars, size, true);
else
{
struct coding_system coding;
setup_coding_system (Qutf_8_unix, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
coding.source = (const unsigned char *) data;
decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
return coding.dst_object;
}
}
/* Detect how end-of-line of a text of length SRC_BYTES pointed by
SOURCE is encoded. If CATEGORY is one of
......
......@@ -695,6 +695,7 @@ extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern bool raw_text_coding_system_p (struct coding_system *);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
extern Lisp_Object make_utf8_string (const char *, ptrdiff_t);
extern void decode_coding_gap (struct coding_system *,
ptrdiff_t, ptrdiff_t);
......@@ -762,6 +763,17 @@ surrogates_to_codepoint (int low, int high)
return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
}
/* Create a multibyte Lisp string from the NUL-terminated UTF-8 string
beginning at DATA. If the string is not a valid UTF-8 string, an
unspecified string is returned. */
INLINE Lisp_Object
build_utf8_string (const char *data)
{
return make_utf8_string (data, strlen (data));
}
extern Lisp_Object preferred_coding_system (void);
/* Coding system to be used to encode text for terminal display when
......
......@@ -223,8 +223,6 @@ static void module_reset_handlerlist (struct handler **);
static bool value_storage_contains_p (const struct emacs_value_storage *,
emacs_value, ptrdiff_t *);
static Lisp_Object module_encode (Lisp_Object);
static Lisp_Object module_decode (Lisp_Object);
static Lisp_Object module_decode_copy (Lisp_Object);
static bool module_assertions = false;
......@@ -532,10 +530,7 @@ module_make_function (emacs_env *env, ptrdiff_t min_arity, ptrdiff_t max_arity,
function->data = data;
if (documentation)
{
AUTO_STRING (unibyte_doc, documentation);
function->documentation = module_decode_copy (unibyte_doc);
}
function->documentation = build_utf8_string (documentation);
Lisp_Object result;
XSET_MODULE_FUNCTION (result, function);
......@@ -668,8 +663,8 @@ module_make_string (emacs_env *env, const char *str, ptrdiff_t length)
MODULE_FUNCTION_BEGIN (NULL);
if (! (0 <= length && length <= STRING_BYTES_BOUND))
overflow_error ();
Lisp_Object lstr = make_unibyte_string (str, length);
return lisp_to_value (env, module_decode (lstr));
Lisp_Object lstr = make_utf8_string (str, length);
return lisp_to_value (env, lstr);
}
static emacs_value
......@@ -1030,18 +1025,6 @@ module_encode (Lisp_Object string)
return code_convert_string (string, Qutf_8_unix, Qt, true, true, true);
}
static Lisp_Object
module_decode (Lisp_Object string)
{
return code_convert_string (string, Qutf_8_unix, Qt, false, true, true);
}
static Lisp_Object
module_decode_copy (Lisp_Object string)
{
return code_convert_string (string, Qutf_8_unix, Qt, false, false, true);
}
/* Value conversion. */
......
......@@ -215,47 +215,11 @@ json_has_suffix (const char *string, const char *suffix)
#endif
/* Create a multibyte Lisp string from the UTF-8 string in
[DATA, DATA + SIZE). If the range [DATA, DATA + SIZE) does not
contain a valid UTF-8 string, the returned string will include raw
bytes.
Note that all callers below either pass only value UTF-8 strings or
use this function for formatting error messages; in the latter case
correctness isn't critical. */
static Lisp_Object
json_make_string (const char *data, ptrdiff_t size)
{
ptrdiff_t chars, bytes;
parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
/* If DATA is a valid UTF-8 string, we can convert it to a Lisp
string directly. Otherwise, we need to decode it. */
if (chars == size || bytes == size)
return make_specified_string (data, chars, size, true);
else
{
struct coding_system coding;
setup_coding_system (Qutf_8_unix, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
coding.source = (const unsigned char *) data;
decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
return coding.dst_object;
}
}
/* Create a multibyte Lisp string from the NUL-terminated UTF-8
string beginning at DATA. If the string is not a valid UTF-8
string, an unspecified string is returned. Note that all callers
below either pass only value UTF-8 strings or use this function for
/* Note that all callers of make_utf8_string and build_utf8_string
below either pass only value UTF-8 strings or use the functionf for
formatting error messages; in the latter case correctness isn't
critical. */
static Lisp_Object
json_build_string (const char *data)
{
return json_make_string (data, strlen (data));
}
/* Return a unibyte string containing the sequence of UTF-8 encoding
units of the UTF-8 representation of STRING. If STRING does not
represent a sequence of Unicode scalar values, return a string with
......@@ -303,8 +267,8 @@ json_parse_error (const json_error_t *error)
symbol = Qjson_parse_error;
#endif
xsignal (symbol,
list5 (json_build_string (error->text),
json_build_string (error->source), INT_TO_INTEGER (error->line),
list5 (build_utf8_string (error->text),
build_utf8_string (error->source), INT_TO_INTEGER (error->line),
INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position)));
}
......@@ -648,7 +612,7 @@ usage: (json-serialize OBJECT &rest ARGS) */)
json_out_of_memory ();
record_unwind_protect_ptr (json_free, string);
return unbind_to (count, json_build_string (string));
return unbind_to (count, build_utf8_string (string));
}
struct json_buffer_and_size
......@@ -855,7 +819,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
case JSON_REAL:
return make_float (json_real_value (json));
case JSON_STRING:
return json_make_string (json_string_value (json),
return make_utf8_string (json_string_value (json),
json_string_length (json));
case JSON_ARRAY:
{
......@@ -915,7 +879,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
json_t *value;
json_object_foreach (json, key_str, value)
{
Lisp_Object key = json_build_string (key_str);
Lisp_Object key = build_utf8_string (key_str);
EMACS_UINT hash;
ptrdiff_t i = hash_lookup (h, key, &hash);
/* Keys in JSON objects are unique, so the key can't
......@@ -932,7 +896,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
json_t *value;
json_object_foreach (json, key_str, value)
{
Lisp_Object key = Fintern (json_build_string (key_str), Qnil);
Lisp_Object key = Fintern (build_utf8_string (key_str), Qnil);
result
= Fcons (Fcons (key, json_to_lisp (value, conf)),
result);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment