Commit c0e16b14 authored by Kenichi Handa's avatar Kenichi Handa
Browse files

(detect_coding): Fix handling of coding->head_ascii.

Be sure to call setup_coding_system when a proper coding system is
found.
(detect_coding_system): Fix handling of coding->head_ascii.
parent 07e81066
2008-06-04 Kenichi Handa <handa@m17n.org>
* coding.c (detect_coding): Fix handling of coding->head_ascii.
Be sure to call setup_coding_system when a proper coding system is
found.
(detect_coding_system): Fix handling of coding->head_ascii.
2008-06-03 Andreas Schwab <schwab@suse.de> 2008-06-03 Andreas Schwab <schwab@suse.de>
* font.c (font_prop_validate_spacing): Fix last change. * font.c (font_prop_validate_spacing): Fix last change.
......
...@@ -5782,6 +5782,7 @@ detect_coding (coding) ...@@ -5782,6 +5782,7 @@ detect_coding (coding)
coding_set_source (coding); coding_set_source (coding);
src_end = coding->source + coding->src_bytes; src_end = coding->source + coding->src_bytes;
coding->head_ascii = 0;
/* If we have not yet decided the text encoding type, detect it /* If we have not yet decided the text encoding type, detect it
now. */ now. */
...@@ -5792,15 +5793,12 @@ detect_coding (coding) ...@@ -5792,15 +5793,12 @@ detect_coding (coding)
int null_byte_found = 0, eight_bit_found = 0; int null_byte_found = 0, eight_bit_found = 0;
detect_info.checked = detect_info.found = detect_info.rejected = 0; detect_info.checked = detect_info.found = detect_info.rejected = 0;
coding->head_ascii = -1;
for (src = coding->source; src < src_end; src++) for (src = coding->source; src < src_end; src++)
{ {
c = *src; c = *src;
if (c & 0x80) if (c & 0x80)
{ {
eight_bit_found = 1; eight_bit_found = 1;
if (coding->head_ascii < 0)
coding->head_ascii = src - coding->source;
if (null_byte_found) if (null_byte_found)
break; break;
} }
...@@ -5810,16 +5808,19 @@ detect_coding (coding) ...@@ -5810,16 +5808,19 @@ detect_coding (coding)
&& ! inhibit_iso_escape_detection && ! inhibit_iso_escape_detection
&& ! detect_info.checked) && ! detect_info.checked)
{ {
if (coding->head_ascii < 0)
coding->head_ascii = src - coding->source;
if (detect_coding_iso_2022 (coding, &detect_info)) if (detect_coding_iso_2022 (coding, &detect_info))
{ {
/* We have scanned the whole data. */ /* We have scanned the whole data. */
if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
/* We didn't find an 8-bit code. We may have {
found a null-byte, but it's very rare that /* We didn't find an 8-bit code. We may
a binary file confirm to ISO-2022. */ have found a null-byte, but it's very
src = src_end; rare that a binary file confirm to
ISO-2022. */
src = src_end;
coding->head_ascii = src - coding->source;
}
detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
break; break;
} }
} }
...@@ -5829,10 +5830,11 @@ detect_coding (coding) ...@@ -5829,10 +5830,11 @@ detect_coding (coding)
if (eight_bit_found) if (eight_bit_found)
break; break;
} }
coding->head_ascii++;
} }
else
coding->head_ascii++;
} }
if (coding->head_ascii < 0)
coding->head_ascii = src - coding->source;
if (null_byte_found || eight_bit_found if (null_byte_found || eight_bit_found
|| coding->head_ascii < coding->src_bytes || coding->head_ascii < coding->src_bytes
...@@ -5886,23 +5888,23 @@ detect_coding (coding) ...@@ -5886,23 +5888,23 @@ detect_coding (coding)
break; break;
} }
} }
if (i < coding_category_raw_text)
setup_coding_system (CODING_ID_NAME (this->id), coding);
else if (null_byte_found)
setup_coding_system (Qno_conversion, coding);
else if ((detect_info.rejected & CATEGORY_MASK_ANY)
== CATEGORY_MASK_ANY)
setup_coding_system (Qraw_text, coding);
else if (detect_info.rejected)
for (i = 0; i < coding_category_raw_text; i++)
if (! (detect_info.rejected & (1 << coding_priorities[i])))
{
this = coding_categories + coding_priorities[i];
setup_coding_system (CODING_ID_NAME (this->id), coding);
break;
}
} }
if (i < coding_category_raw_text)
setup_coding_system (CODING_ID_NAME (this->id), coding);
else if (null_byte_found)
setup_coding_system (Qno_conversion, coding);
else if ((detect_info.rejected & CATEGORY_MASK_ANY)
== CATEGORY_MASK_ANY)
setup_coding_system (Qraw_text, coding);
else if (detect_info.rejected)
for (i = 0; i < coding_category_raw_text; i++)
if (! (detect_info.rejected & (1 << coding_priorities[i])))
{
this = coding_categories + coding_priorities[i];
setup_coding_system (CODING_ID_NAME (this->id), coding);
break;
}
} }
} }
else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
...@@ -7655,6 +7657,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, ...@@ -7655,6 +7657,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
coding.src_multibyte = multibytep; coding.src_multibyte = multibytep;
coding.consumed = 0; coding.consumed = 0;
coding.mode |= CODING_MODE_LAST_BLOCK; coding.mode |= CODING_MODE_LAST_BLOCK;
coding.head_ascii = 0;
detect_info.checked = detect_info.found = detect_info.rejected = 0; detect_info.checked = detect_info.found = detect_info.rejected = 0;
...@@ -7666,7 +7669,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, ...@@ -7666,7 +7669,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
struct coding_system *this; struct coding_system *this;
int c, i; int c, i;
coding.head_ascii = -1;
/* Skip all ASCII bytes except for a few ISO2022 controls. */ /* Skip all ASCII bytes except for a few ISO2022 controls. */
for (; src < src_end; src++) for (; src < src_end; src++)
{ {
...@@ -7674,27 +7676,28 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, ...@@ -7674,27 +7676,28 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
if (c & 0x80) if (c & 0x80)
{ {
eight_bit_found = 1; eight_bit_found = 1;
if (coding.head_ascii < 0)
coding.head_ascii = src - coding.source;
if (null_byte_found) if (null_byte_found)
break; break;
} }
if (c < 0x20) else if (c < 0x20)
{ {
if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
&& ! inhibit_iso_escape_detection && ! inhibit_iso_escape_detection
&& ! detect_info.checked) && ! detect_info.checked)
{ {
if (coding.head_ascii < 0)
coding.head_ascii = src - coding.source;
if (detect_coding_iso_2022 (&coding, &detect_info)) if (detect_coding_iso_2022 (&coding, &detect_info))
{ {
/* We have scanned the whole data. */ /* We have scanned the whole data. */
if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
/* We didn't find an 8-bit code. We may have {
found a null-byte, but it's very rare that /* We didn't find an 8-bit code. We may
a binary file confirm to ISO-2022. */ have found a null-byte, but it's very
src = src_end; rare that a binary file confirm to
ISO-2022. */
src = src_end;
coding.head_ascii = src - coding.source;
}
detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
break; break;
} }
} }
...@@ -7704,10 +7707,11 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, ...@@ -7704,10 +7707,11 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
if (eight_bit_found) if (eight_bit_found)
break; break;
} }
coding.head_ascii++;
} }
else
coding.head_ascii++;
} }
if (coding.head_ascii < 0)
coding.head_ascii = src - coding.source;
if (null_byte_found || eight_bit_found if (null_byte_found || eight_bit_found
|| coding.head_ascii < coding.src_bytes || coding.head_ascii < coding.src_bytes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment