Commit 6d2851de authored by Eli Zaretskii's avatar Eli Zaretskii
Browse files

Fix bug #13515 with processing DBCS file names on MS-Windows.

 src/w32.c (max_filename_mbslen): New function.
 (normalize_filename, readdir): Use it to detect locales where ANSI
 encoding of file names uses a double-byte character set (DBCS).
 If a DBCS encoding is used, advance by characters using
 CharNextExA, instead of incrementing a 'char *' pointer.  Use
 _mbslwr instead of _strlwr.
parent 4d4ddaa7
2013-01-23 Eli Zaretskii <eliz@gnu.org>
* w32.c (max_filename_mbslen): New function.
(normalize_filename, readdir): Use it to detect locales where ANSI
encoding of file names uses a double-byte character set (DBCS).
If a DBCS encoding is used, advance by characters using
CharNextExA, instead of incrementing a 'char *' pointer. Use
_mbslwr instead of _strlwr. (Bug#13515)
2013-01-22 Eli Zaretskii <eliz@gnu.org> 2013-01-22 Eli Zaretskii <eliz@gnu.org>
   
* w32heap.c (allocate_heap) [!_WIN64]: Decrease the initial * w32heap.c (allocate_heap) [!_WIN64]: Decrease the initial
......
...@@ -37,7 +37,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ ...@@ -37,7 +37,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
/* must include CRT headers *before* config.h */ /* must include CRT headers *before* config.h */
#include <config.h> #include <config.h>
#include <mbstring.h> /* for _mbspbrk */ #include <mbstring.h> /* for _mbspbrk and _mbslwr */
#undef access #undef access
#undef chdir #undef chdir
...@@ -1304,6 +1304,67 @@ srandom (int seed) ...@@ -1304,6 +1304,67 @@ srandom (int seed)
srand (seed); srand (seed);
} }
/* Current codepage for encoding file names. */
static int file_name_codepage;
/* Return the maximum length in bytes of a multibyte character
sequence encoded in the current ANSI codepage. This is required to
correctly walk the encoded file names one character at a time. */
static int
max_filename_mbslen (void)
{
/* A simple cache to avoid calling GetCPInfo every time we need to
normalize a file name. The file-name encoding is not supposed to
be changed too frequently, if ever. */
static Lisp_Object last_file_name_encoding;
static int last_max_mbslen;
Lisp_Object current_encoding;
current_encoding = Vfile_name_coding_system;
if (NILP (current_encoding))
current_encoding = Vdefault_file_name_coding_system;
if (!EQ (last_file_name_encoding, current_encoding))
{
CPINFO cp_info;
last_file_name_encoding = current_encoding;
/* Default to the current ANSI codepage. */
file_name_codepage = w32_ansi_code_page;
if (!NILP (current_encoding))
{
char *cpname = SDATA (SYMBOL_NAME (current_encoding));
char *cp = NULL, *end;
int cpnum;
if (strncmp (cpname, "cp", 2) == 0)
cp = cpname + 2;
else if (strncmp (cpname, "windows-", 8) == 0)
cp = cpname + 8;
if (cp)
{
end = cp;
cpnum = strtol (cp, &end, 10);
if (cpnum && *end == '\0' && end - cp >= 2)
file_name_codepage = cpnum;
}
}
if (!file_name_codepage)
file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
if (!GetCPInfo (file_name_codepage, &cp_info))
{
file_name_codepage = CP_ACP;
if (!GetCPInfo (file_name_codepage, &cp_info))
emacs_abort ();
}
last_max_mbslen = cp_info.MaxCharSize;
}
return last_max_mbslen;
}
/* Normalize filename by converting all path separators to /* Normalize filename by converting all path separators to
the specified separator. Also conditionally convert upper the specified separator. Also conditionally convert upper
...@@ -1313,14 +1374,20 @@ static void ...@@ -1313,14 +1374,20 @@ static void
normalize_filename (register char *fp, char path_sep) normalize_filename (register char *fp, char path_sep)
{ {
char sep; char sep;
char *elem; char *elem, *p2;
int dbcs_p = max_filename_mbslen () > 1;
/* Always lower-case drive letters a-z, even if the filesystem /* Always lower-case drive letters a-z, even if the filesystem
preserves case in filenames. preserves case in filenames.
This is so filenames can be compared by string comparison This is so filenames can be compared by string comparison
functions that are case-sensitive. Even case-preserving filesystems functions that are case-sensitive. Even case-preserving filesystems
do not distinguish case in drive letters. */ do not distinguish case in drive letters. */
if (fp[1] == ':' && *fp >= 'A' && *fp <= 'Z') if (dbcs_p)
p2 = CharNextExA (file_name_codepage, fp, 0);
else
p2 = fp + 1;
if (*p2 == ':' && *fp >= 'A' && *fp <= 'Z')
{ {
*fp += 'a' - 'A'; *fp += 'a' - 'A';
fp += 2; fp += 2;
...@@ -1332,7 +1399,10 @@ normalize_filename (register char *fp, char path_sep) ...@@ -1332,7 +1399,10 @@ normalize_filename (register char *fp, char path_sep)
{ {
if (*fp == '/' || *fp == '\\') if (*fp == '/' || *fp == '\\')
*fp = path_sep; *fp = path_sep;
fp++; if (!dbcs_p)
fp++;
else
fp = CharNextExA (file_name_codepage, fp, 0);
} }
return; return;
} }
...@@ -1355,13 +1425,20 @@ normalize_filename (register char *fp, char path_sep) ...@@ -1355,13 +1425,20 @@ normalize_filename (register char *fp, char path_sep)
if (elem && elem != fp) if (elem && elem != fp)
{ {
*fp = 0; /* temporary end of string */ *fp = 0; /* temporary end of string */
_strlwr (elem); /* while we convert to lower case */ _mbslwr (elem); /* while we convert to lower case */
} }
*fp = sep; /* convert (or restore) path separator */ *fp = sep; /* convert (or restore) path separator */
elem = fp + 1; /* next element starts after separator */ elem = fp + 1; /* next element starts after separator */
sep = path_sep; sep = path_sep;
} }
} while (*fp++); if (*fp)
{
if (!dbcs_p)
fp++;
else
fp = CharNextExA (file_name_codepage, fp, 0);
}
} while (*fp);
} }
/* Destructively turn backslashes into slashes. */ /* Destructively turn backslashes into slashes. */
...@@ -2588,15 +2665,22 @@ readdir (DIR *dirp) ...@@ -2588,15 +2665,22 @@ readdir (DIR *dirp)
strcpy (dir_static.d_name, dir_find_data.cFileName); strcpy (dir_static.d_name, dir_find_data.cFileName);
dir_static.d_namlen = strlen (dir_static.d_name); dir_static.d_namlen = strlen (dir_static.d_name);
if (dir_is_fat) if (dir_is_fat)
_strlwr (dir_static.d_name); _mbslwr (dir_static.d_name);
else if (downcase) else if (downcase)
{ {
register char *p; register char *p;
for (p = dir_static.d_name; *p; p++) int dbcs_p = max_filename_mbslen () > 1;
if (*p >= 'a' && *p <= 'z') for (p = dir_static.d_name; *p; )
break; {
if (*p >= 'a' && *p <= 'z')
break;
if (dbcs_p)
p = CharNextExA (file_name_codepage, p, 0);
else
p++;
}
if (!*p) if (!*p)
_strlwr (dir_static.d_name); _mbslwr (dir_static.d_name);
} }
return &dir_static; return &dir_static;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment