Commit faf996dc authored by Eli Zaretskii's avatar Eli Zaretskii

Fix decoding ASCII strings with embedded CR characters

* src/coding.c (string_ascii_p): Return a negative value if an
all-ASCII string STR includes the CR character, otherwise a
positive value.
(code_convert_string): If the string is ASCII, but includes CR
characters, use the fast path only if EOL doesn't need to be
decoded.  (Bug#40519)

* test/src/coding-tests.el (coding-nocopy-ascii): Add tests for
bug#40519.
parent 1aeb1819
......@@ -9471,15 +9471,22 @@ not fully specified.) */)
return code_convert_region (start, end, coding_system, destination, 1, 0);
}
/* Whether a string only contains chars in the 0..127 range. */
static bool
/* Non-zero if STR contains only characterss in the 0..127 range.
Positive if STR includes characters that don't need EOL conversion
on decoding, negative otherwise. */
static int
string_ascii_p (Lisp_Object str)
{
ptrdiff_t nbytes = SBYTES (str);
bool CR_Seen = false;
for (ptrdiff_t i = 0; i < nbytes; i++)
if (SREF (str, i) > 127)
return false;
return true;
{
if (SREF (str, i) > 127)
return 0;
if (SREF (str, i) == '\r')
CR_Seen = true;
}
return CR_Seen ? -1 : 1;
}
Lisp_Object
......@@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
{
/* Fast path for ASCII-only input and an ASCII-compatible coding:
act as identity. */
int ascii_p;
Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
&& (STRING_MULTIBYTE (string)
? (chars == bytes) : string_ascii_p (string)))
return (nocopy
? string
: (encodep
? make_unibyte_string (SSDATA (string), bytes)
: make_multibyte_string (SSDATA (string), bytes, bytes)));
? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
{
if (ascii_p > 0
|| (ascii_p < 0
&& (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
|| inhibit_eol_conversion)))
return (nocopy
? string
: (encodep
? make_unibyte_string (SSDATA (string), bytes)
: make_multibyte_string (SSDATA (string),
bytes, bytes)));
}
}
else if (BUFFERP (dst_object))
{
......
......@@ -387,6 +387,23 @@
"Check that the NOCOPY parameter works for ASCII-only strings."
(let* ((uni (apply #'string (number-sequence 0 127)))
(multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
(dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
(should-not (eq (decode-coding-string s coding nil) s))
(should-not (eq (encode-coding-string s coding nil) s))
(should (eq (decode-coding-string s coding t) s))
(should (eq (encode-coding-string s coding t) s)))))
(let* ((uni (apply #'string (number-sequence 15 127)))
(multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
(dolist (coding '(us-ascii iso-latin-1 utf-8))
(should-not (eq (decode-coding-string s coding nil) s))
(should-not (eq (encode-coding-string s coding nil) s))
(should (eq (decode-coding-string s coding t) s))
(should (eq (encode-coding-string s coding t) s)))))
(let* ((uni (apply #'string (number-sequence 0 127)))
(multi (string-to-multibyte uni))
(inhibit-eol-conversion t))
(dolist (s (list uni multi))
(dolist (coding '(us-ascii iso-latin-1 utf-8))
(should-not (eq (decode-coding-string s coding nil) s))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment