Commit c3e9160b authored by Eli Zaretskii's avatar Eli Zaretskii

Finished conversion routines; w32-unicode-filenames exposed to Lisp.

parent 03d58cca
......@@ -9473,6 +9473,49 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
}
/* Encode or decode a file name, to or from a unibyte string suitable
for passing to C library functions. */
Lisp_Object
decode_file_name (Lisp_Object fname)
{
#ifdef WINDOWSNT
/* The w32 build pretends to use UTF-8 for file-name encoding, and
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
return code_convert_string_norecord (fname, Qutf_8, 0);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
else if (! NILP (Vdefault_file_name_coding_system))
return code_convert_string_norecord (fname,
Vdefault_file_name_coding_system, 0);
else
return fname;
#endif
}
Lisp_Object
encode_file_name (Lisp_Object fname)
{
#ifdef WINDOWSNT
/* The w32 build pretends to use UTF-8 for file-name encoding, and
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
return code_convert_string_norecord (fname, Qutf_8, 1);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
else if (! NILP (Vdefault_file_name_coding_system))
return code_convert_string_norecord (fname,
Vdefault_file_name_coding_system, 1);
else
return fname;
#endif
}
DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
2, 4, 0,
......
......@@ -672,23 +672,11 @@ struct coding_system
/* Encode the file name NAME using the specified coding system
for file names, if any. */
#define ENCODE_FILE(name) \
(! NILP (Vfile_name_coding_system) \
? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
: (! NILP (Vdefault_file_name_coding_system) \
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
: name))
#define ENCODE_FILE(NAME) encode_file_name (NAME)
/* Decode the file name NAME using the specified coding system
for file names, if any. */
#define DECODE_FILE(name) \
(! NILP (Vfile_name_coding_system) \
? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
: (! NILP (Vdefault_file_name_coding_system) \
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
: name))
#define DECODE_FILE(NAME) decode_file_name (NAME)
/* Encode the string STR using the specified coding system
for system functions, if any. */
......@@ -716,6 +704,8 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
Lisp_Object, bool, bool, bool);
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
bool);
extern Lisp_Object encode_file_name (Lisp_Object);
extern Lisp_Object decode_file_name (Lisp_Object);
extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
......
......@@ -300,8 +300,6 @@ static BOOL g_b_init_is_valid_security_descriptor;
static BOOL g_b_init_set_file_security;
static BOOL g_b_init_get_adapters_info;
int w32_unicode_filenames;
/*
BEGIN: Wrapper functions around OpenProcessToken
and other functions in advapi32.dll that are only
......@@ -1186,12 +1184,74 @@ w32_valid_pointer_p (void *p, int size)
/* Converting file names from UTF-8 to either UTF-16 or the system
ANSI codepage. */
/* Converting file names from UTF-8 to either UTF-16 or the ANSI
codepage defined by file-name-coding-system. */
/* Current codepage for encoding file names. */
static int file_name_codepage;
/* Produce a Windows ANSI codepage suitable for encoding file names.
Return the information about that codepage in CP_INFO. */
static int
codepage_for_filenames (CPINFO *cp_info)
{
/* A simple cache to avoid calling GetCPInfo every time we need to
encode/decode a file name. The file-name encoding is not
supposed to be changed too frequently, if ever. */
static Lisp_Object last_file_name_encoding;
static CPINFO cp;
Lisp_Object current_encoding;
current_encoding = Vfile_name_coding_system;
if (NILP (current_encoding))
current_encoding = Vdefault_file_name_coding_system;
if (!EQ (last_file_name_encoding, current_encoding))
{
/* Default to the current ANSI codepage. */
file_name_codepage = w32_ansi_code_page;
if (NILP (current_encoding))
{
char *cpname = SDATA (SYMBOL_NAME (current_encoding));
char *cp = NULL, *end;
int cpnum;
if (strncmp (cpname, "cp", 2) == 0)
cp = cpname + 2;
else if (strncmp (cpname, "windows-", 8) == 0)
cp = cpname + 8;
if (cp)
{
end = cp;
cpnum = strtol (cp, &end, 10);
if (cpnum && *end == '\0' && end - cp >= 2)
file_name_codepage = cpnum;
}
}
if (!file_name_codepage)
file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
if (!GetCPInfo (file_name_codepage, &cp))
{
file_name_codepage = CP_ACP;
if (!GetCPInfo (file_name_codepage, &cp))
emacs_abort ();
}
}
if (cp_info)
*cp_info = cp;
return file_name_codepage;
}
static int
filename_to_utf16 (const char *fn_in, wchar_t *fn_out)
{
int result = MultiByteToWideChar (CP_UTF8, 0, fn_in, -1, fn_out, MAX_PATH);
int result = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, fn_in, -1,
fn_out, MAX_PATH);
if (!result)
{
......@@ -1217,7 +1277,28 @@ filename_to_utf16 (const char *fn_in, wchar_t *fn_out)
static int
filename_from_utf16 (const wchar_t *fn_in, char *fn_out)
{
return -1;
int result = WideCharToMultiByte (CP_UTF8, 0, fn_in, -1,
fn_out, MAX_UTF8_PATH, NULL, NULL);
if (!result)
{
DWORD err = GetLastError ();
switch (err)
{
case ERROR_INVALID_FLAGS:
case ERROR_INVALID_PARAMETER:
errno = EINVAL;
break;
case ERROR_INSUFFICIENT_BUFFER:
case ERROR_NO_UNICODE_TRANSLATION:
default:
errno = ENOENT;
break;
}
return -1;
}
return 0;
}
static int
......@@ -1227,9 +1308,11 @@ filename_to_ansi (const char *fn_in, char *fn_out)
if (filename_to_utf16 (fn_in, fn_utf16) == 0)
{
int result = WideCharToMultiByte (CP_ACP, 0, fn_utf16, -1,
fn_out, MAX_UTF8_PATH, NULL, NULL);
int result;
int codepage = codepage_for_filenames (NULL);
result = WideCharToMultiByte (codepage, 0, fn_utf16, -1,
fn_out, MAX_UTF8_PATH, NULL, NULL);
if (!result)
{
DWORD err = GetLastError ();
......@@ -1250,12 +1333,36 @@ filename_to_ansi (const char *fn_in, char *fn_out)
}
return 0;
}
return -1;
}
static int
filename_from_ansi (const char *fn_in, char *fn_out)
{
return -1;
wchar_t fn_utf16[MAXPATHLEN];
int codepage = codepage_for_filenames (NULL);
int result = MultiByteToWideChar (codepage, MB_ERR_INVALID_CHARS, fn_in, -1,
fn_utf16, MAX_PATH);
if (!result)
{
DWORD err = GetLastError ();
switch (err)
{
case ERROR_INVALID_FLAGS:
case ERROR_INVALID_PARAMETER:
errno = EINVAL;
break;
case ERROR_INSUFFICIENT_BUFFER:
case ERROR_NO_UNICODE_TRANSLATION:
default:
errno = ENOENT;
break;
}
return -1;
}
return filename_from_utf16 (fn_utf16, fn_out);
}
......@@ -1662,66 +1769,16 @@ srandom (int seed)
srand (seed);
}
/* Current codepage for encoding file names. */
static int file_name_codepage;
/* Return the maximum length in bytes of a multibyte character
sequence encoded in the current ANSI codepage. This is required to
correctly walk the encoded file names one character at a time. */
static int
max_filename_mbslen (void)
{
/* A simple cache to avoid calling GetCPInfo every time we need to
normalize a file name. The file-name encoding is not supposed to
be changed too frequently, if ever. */
static Lisp_Object last_file_name_encoding;
static int last_max_mbslen;
Lisp_Object current_encoding;
current_encoding = Vfile_name_coding_system;
if (NILP (current_encoding))
current_encoding = Vdefault_file_name_coding_system;
if (!EQ (last_file_name_encoding, current_encoding))
{
CPINFO cp_info;
last_file_name_encoding = current_encoding;
/* Default to the current ANSI codepage. */
file_name_codepage = w32_ansi_code_page;
if (!NILP (current_encoding))
{
char *cpname = SDATA (SYMBOL_NAME (current_encoding));
char *cp = NULL, *end;
int cpnum;
if (strncmp (cpname, "cp", 2) == 0)
cp = cpname + 2;
else if (strncmp (cpname, "windows-", 8) == 0)
cp = cpname + 8;
if (cp)
{
end = cp;
cpnum = strtol (cp, &end, 10);
if (cpnum && *end == '\0' && end - cp >= 2)
file_name_codepage = cpnum;
}
}
if (!file_name_codepage)
file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
if (!GetCPInfo (file_name_codepage, &cp_info))
{
file_name_codepage = CP_ACP;
if (!GetCPInfo (file_name_codepage, &cp_info))
emacs_abort ();
}
last_max_mbslen = cp_info.MaxCharSize;
}
CPINFO cp_info;
return last_max_mbslen;
codepage_for_filenames (&cp_info);
return cp_info.MaxCharSize;
}
/* Normalize filename by converting all path separators to
......
......@@ -200,8 +200,6 @@ extern void record_pending_deletion (char *);
extern void sys_sleep (int);
extern int sys_link (const char *, const char *);
#ifdef HAVE_GNUTLS
#include <gnutls/gnutls.h>
......
......@@ -6567,6 +6567,18 @@ X toolkit. Possible values are: gtk, motif, xaw, or xaw3d.
With MS Windows or Nextstep, the value is t. */);
Vx_toolkit_scroll_bars = Qt;
DEFVAR_BOOL ("w32-unicode-filenames",
w32_unicode_filenames,
doc: /* Non-nil means use Unicode APIs when passing file names to the OS.
A value of nil means file names passed to the OS APIs and returned
from those APIs are encoded/decoded using the ANSI codepage
specified by `file-name-coding-system'.
This variable is set to non-nil by default when Emacs runs on Windows
systems of the NT family, including W2K, XP, Vista, Windows 7 and
Windows 8. It is set to nil on Windows 9X. */);
w32_unicode_filenames = 0;
/* Tell Emacs about this window system. */
Fprovide (Qw32, Qnil);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment