Commit 01633a17 authored by Eli Zaretskii's avatar Eli Zaretskii

Support MS-Windows file names that use characters outside of ANSI codepage.

 src/w32.c (get_file_security, set_file_security)
 (create_symbolic_link): Separate pointers and boolean flags for
 ANSI and Unicode APIs.  Use the latter if w32_unicode_filenames is
 non-zero, else the former.
 (codepage_for_filenames, filename_to_utf16, )
 (filename_from_utf16, filename_to_ansi, filename_from_ansi): New
 functions.
 (init_user_info): Allow $HOME and $SHELL to include non-ANSI
 characters.
 (normalize_filename): Lose the DBCS code, now works on UTF-8.
 Accept only one argument; all callers changed.
 (dostounix_filename): Remove the second argument, now works in
 UTF-8.  All callers changed.
 (parse_root): Lose DBCS code.
 (get_long_basename, w32_get_short_filename, init_environment)
 (GetCachedVolumeInformation, sys_readdir, open_unc_volume)
 (read_unc_volume, logon_network_drive, faccessat, sys_chdir)
 (sys_chmod, sys_creat, sys_fopen, sys_link, sys_mkdir, sys_open)
 (sys_rename_replace, sys_rmdir, sys_unlink, stat_worker, utime)
 (is_symlink, readlink, chase_symlinks, w32_delayed_load): Work in
 Unicode mode if w32_unicode_filenames is non-zero, in ANSI mode
 otherwise.
 (ansi_encode_filename): New function.
 (get_emacs_configuration, get_emacs_configuration_options):
 Functions deleted.
 (add_volume_info, GetCachedVolumeInformation): Run the input file
 name through unixtodos_filename, to ensure it is stored and
 referenced in canonical form.
 (get_volume_info): Lose the DBCS code, now works in UTF-8.
 (logon_network_drive, sys_link, utime): Improve error handling.
 (sys_access): New function.
 (hashval, generate_inode_val): Unused functions deleted.
 (symlink, readlink, readlinkat): Lose DBCS code, now works in UTF-8.
 (check_windows_init_file): Convert error message from UTF-8 to
 ANSI codepage, for display in the message box.
 (globals_of_w32): Set w32_unicode_filenames according to the OS
 version.
 src/w32term.c (construct_drag_n_drop): Work in Unicode mode when
 w32_unicode_filenames is non-zero, ANSI mode otherwise.
 (syms_of_w32term): Declare w32-unicode-filenames.
 src/w32proc.c (new_child, delete_child): Remove code that handled
 unused pending_deletion and input_file members of the child struct.
 (create_child, sys_spawnve): Convert all file names to ANSI
 codepage.  Use ANSI APIs explicitly; forcibly fail if any file
 name cannot be encoded in ANSI codepage.  Don't use
 unixtodos_filename, mirror slashes by hand.
 (record_infile, record_pending_deletion): Functions deleted.
 (Fw32_short_file_name): Call w32_get_short_filename instead of
 GetShortPathName.
 src/w32notify.c (add_watch): Work in Unicode mode when
 w32_unicode_filenames is non-zero, ANSI mode otherwise.
 (Fw32notify_add_watch): Rewrite to avoid using GetFullPathName;
 instead, do the same with Lisp primitives.
 src/w32fns.c (file_dialog_callback, Fx_file_dialog)
 (Fsystem_move_file_to_trash, Fw32_shell_execute)
 (Ffile_system_info, Fdefault_printer_name): Work in Unicode mode
 when w32_unicode_filenames is non-zero, ANSI mode otherwise.
 (Fw32_shell_execute): Improve error reporting.
 (Fdefault_printer_name): Ifdef away for Cygwin.
 src/w32.h (struct _child_process): Remove input_file and
 pending_deletion members that are no longer used.
 (dostounix_filename, w32_get_short_filename, filename_from_ansi)
 (filename_to_ansi, filename_from_utf16, filename_to_utf16)
 (ansi_encode_filename): New and updated prototypes.
 src/unexw32.c (open_input_file, open_output_file, unexec): Use ANSI
 APIs explicitly.
 (unexec): Don't use dostounix_filename, it expects a file name in
 UTF-8.  Instead, mirror backslashes by hand.  Convert NEW_NAME to
 ANSI encoding.
 src/fileio.c (Ffile_name_directory, file_name_as_directory)
 (directory_file_name, Fexpand_file_name)
 (Fsubstitute_in_file_name) [WINDOWSNT]: Adapt to the change in
 arguments of dostounix_filename.
 (Fexpand_file_name) [WINDOWSNT]: Convert value of $HOME to UTF-8.
 use MAX_UTF8_PATH for size of file-name strings.
 (emacs_readlinkat): Build an explicitly unibyte string for file
 names.
 (syms_of_fileio) <file-name-coding-system>
 default-file-name-coding-system>: Mention MS-Windows peculiarities.
 src/emacs.c (init_cmdargs) [WINDOWSNT]: Convert argv[0] to UTF-8.
 (main) [WINDOWSNT]: Convert the argv[] elements that are files or
 directories to UTF-8.
 (decode_env_path) [WINDOWSNT]: Convert file names taken from the
 environment, and each element of the input PATH, to UTF-8.
 src/dired.c (file_attributes): Use build_unibyte_string explicitly
 to make Lisp strings from user and group names.
 src/coding.h (ENCODE_FILE, DECODE_FILE): Just call encode_file and
 decode_file.
 src/coding.c (decode_file_name, encode_file_name): New functions.
 src/termcap.c (tgetent): Adapt to the change in arguments of
 dostounix_filename.
 src/sysdep.c (sys_subshell) [WINDOWSNT]: Use MAX_UTF8_PATH for file
 names.
 src/msdos.c (dostounix_filename, init_environment): Adapt to the
 change in arguments of dostounix_filename.
 src/image.c (xpm_load, tiff_load, gif_load, imagemagick_load)
 [WINDOWSNT]: Encode file names passed to the image libraries in
 ANSI codepage.
 src/gnutls.c (Fgnutls_boot): Encode all file names passed to GnuTLS.
 [WINDOWSNT]: Convert file names to the current ANSI codepage.
 src/filelock.c (lock_file) [WINDOWSNT]: Adapt to the change in
 arguments of dostounix_filename.

 nt/inc/ms-w32.h (MAX_UTF8_PATH): New macro.
 (opendir, closedir, readdir, seekdir): Redirect to replacement
 functions.
 nt/inc/dirent.h: Make d_name[] be MAXNAMELEN*4 characters long.

 lisp/term/w32-win.el (w32-handle-dropped-file):
 lisp/startup.el (normal-top-level):
 lisp/net/browse-url.el (browse-url-file-url):
 lisp/dnd.el (dnd-get-local-file-name): On MS-Windows, encode and
 decode file names using 'utf-8' rather than
 file-name-coding-system.

 doc/emacs/mule.texi (File Name Coding): Document file-name encoding
 peculiarities on MS-Windows.

 doc/lispref/nonascii.texi (Encoding and I/O): Document file-name encoding
 peculiarities on MS-Windows.

 etc/NEWS: Mention support on MS-Windows of file names outside of the
 current locale.

Fixes: debbugs:7100
parents cf86e18b 893fcd38
2013-12-12 Eli Zaretskii <eliz@gnu.org>
* mule.texi (File Name Coding): Document file-name encoding
peculiarities on MS-Windows.
2013-12-12 Glenn Morris <rgm@gnu.org> 2013-12-12 Glenn Morris <rgm@gnu.org>
* emacs.texi: Sync direntry with info/dir version. * emacs.texi: Sync direntry with info/dir version.
......
...@@ -1130,6 +1130,21 @@ In the default language environment, non-@acronym{ASCII} characters in ...@@ -1130,6 +1130,21 @@ In the default language environment, non-@acronym{ASCII} characters in
file names are not encoded specially; they appear in the file system file names are not encoded specially; they appear in the file system
using the internal Emacs representation. using the internal Emacs representation.
@cindex file-name encoding, MS-Windows
@vindex w32-unicode-filenames
When Emacs runs on MS-Windows versions that are descendants of the
NT family (Windows 2000, XP, Vista, Windows 7, and Windows 8), the
value of @code{file-name-coding-system} is largely ignored, as Emacs
by default uses APIs that allow to pass Unicode file names directly.
By contrast, on Windows 9X, file names are encoded using
@code{file-name-coding-system}, which should be set to the codepage
(@pxref{Coding Systems, codepage}) pertinent for the current system
locale. The value of the variable @code{w32-unicode-filenames}
controls whether Emacs uses the Unicode APIs when it calls OS
functions that accept file names. This variable is set by the startup
code to @code{nil} on Windows 9X, and to @code{t} on newer versions of
MS-Windows.
@strong{Warning:} if you change @code{file-name-coding-system} (or the @strong{Warning:} if you change @code{file-name-coding-system} (or the
language environment) in the middle of an Emacs session, problems can language environment) in the middle of an Emacs session, problems can
result if you have already visited files whose names were encoded using result if you have already visited files whose names were encoded using
......
2013-12-12 Eli Zaretskii <eliz@gnu.org>
* nonascii.texi (Encoding and I/O): Document file-name encoding
peculiarities on MS-Windows.
2013-12-12 Glenn Morris <rgm@gnu.org> 2013-12-12 Glenn Morris <rgm@gnu.org>
* elisp.texi: Sync direntry with info/dir version. * elisp.texi: Sync direntry with info/dir version.
......
...@@ -1108,6 +1108,16 @@ visited file name, saving may use the wrong file name, or it may get ...@@ -1108,6 +1108,16 @@ visited file name, saving may use the wrong file name, or it may get
an error. If such a problem happens, use @kbd{C-x C-w} to specify a an error. If such a problem happens, use @kbd{C-x C-w} to specify a
new file name for that buffer. new file name for that buffer.
@cindex file-name encoding, MS-Windows
On Windows 2000 and later, Emacs by default uses Unicode APIs to
pass file names to the OS, so the value of
@code{file-name-coding-system} is largely ignored. Lisp applications
that need to encode or decode file names on the Lisp level should use
@code{utf-8} coding-system when @code{system-type} is
@code{windows-nt}; the conversion of UTF-8 encoded file names to the
encoding appropriate for communicating with the OS is performed
internally by Emacs.
@node Lisp and Coding Systems @node Lisp and Coding Systems
@subsection Coding Systems in Lisp @subsection Coding Systems in Lisp
......
2013-12-12 Eli Zaretskii <eliz@gnu.org>
* NEWS: Mention support on MS-Windows of file names outside of the
current locale.
2013-11-23 Xue Fuqiao <xfq.free@gmail.com> 2013-11-23 Xue Fuqiao <xfq.free@gmail.com>
* TODO: Minor update. * TODO: Minor update.
......
...@@ -1000,6 +1000,14 @@ files are in share/emacs/VERSION/etc. (Emacs knows about all these ...@@ -1000,6 +1000,14 @@ files are in share/emacs/VERSION/etc. (Emacs knows about all these
directories and will find the files in there automatically; there's no directories and will find the files in there automatically; there's no
need to set any variables due to this change.) need to set any variables due to this change.)
+++
** Emacs on Windows 2000 and later can now access files and directories
whose names cannot be encoded in the current system codepage.
The new variable `w32-unicode-filenames' controls this feature: if it
is t, Emacs uses Unicode APIs to pass file names to system calls,
which lifts the limitation of file names to the current locale.
+++ +++
** The "generate a backtrace on fatal error" feature now works on MS Windows. ** The "generate a backtrace on fatal error" feature now works on MS Windows.
The backtrace is written to the 'emacs_backtrace.txt' file in the The backtrace is written to the 'emacs_backtrace.txt' file in the
......
2013-12-12 Eli Zaretskii <eliz@gnu.org>
* term/w32-win.el (w32-handle-dropped-file):
* startup.el (normal-top-level):
* net/browse-url.el (browse-url-file-url):
* dnd.el (dnd-get-local-file-name): On MS-Windows, encode and
decode file names using 'utf-8' rather than
file-name-coding-system.
2013-12-12 Fabián Ezequiel Gallina <fgallina@gnu.org> 2013-12-12 Fabián Ezequiel Gallina <fgallina@gnu.org>
* progmodes/python.el (python-indent-context) * progmodes/python.el (python-indent-context)
......
...@@ -152,10 +152,13 @@ Return nil if URI is not a local file." ...@@ -152,10 +152,13 @@ Return nil if URI is not a local file."
(let ((f (cond ((string-match "^file:///" uri) ; XDND format. (let ((f (cond ((string-match "^file:///" uri) ; XDND format.
(substring uri (1- (match-end 0)))) (substring uri (1- (match-end 0))))
((string-match "^file:" uri) ; Old KDE, Motif, Sun ((string-match "^file:" uri) ; Old KDE, Motif, Sun
(substring uri (match-end 0)))))) (substring uri (match-end 0)))))
(and f (setq f (decode-coding-string (dnd-unescape-uri f) (coding (if (equal system-type 'windows-nt)
(or file-name-coding-system ;; W32 pretends that file names are UTF-8 encoded.
default-file-name-coding-system)))) 'utf-8
(or file-name-coding-system
default-file-name-coding-system))))
(and f (setq f (decode-coding-string (dnd-unescape-uri f) coding)))
(when (and f must-exist (not (file-readable-p f))) (when (and f must-exist (not (file-readable-p f)))
(setq f nil)) (setq f nil))
f)) f))
......
...@@ -723,9 +723,12 @@ interactively. Turn the filename into a URL with function ...@@ -723,9 +723,12 @@ interactively. Turn the filename into a URL with function
(defun browse-url-file-url (file) (defun browse-url-file-url (file)
"Return the URL corresponding to FILE. "Return the URL corresponding to FILE.
Use variable `browse-url-filename-alist' to map filenames to URLs." Use variable `browse-url-filename-alist' to map filenames to URLs."
(let ((coding (and (default-value 'enable-multibyte-characters) (let ((coding (if (equal system-type 'windows-nt)
(or file-name-coding-system ;; W32 pretends that file names are UTF-8 encoded.
default-file-name-coding-system)))) 'utf-8
(and (default-value 'enable-multibyte-characters)
(or file-name-coding-system
default-file-name-coding-system)))))
(if coding (setq file (encode-coding-string file coding)))) (if coding (setq file (encode-coding-string file coding))))
(setq file (browse-url-url-encode-chars file "[*\"()',=;?% ]")) (setq file (browse-url-url-encode-chars file "[*\"()',=;?% ]"))
(dolist (map browse-url-filename-alist) (dolist (map browse-url-filename-alist)
......
...@@ -533,43 +533,45 @@ It is the default value of the variable `top-level'." ...@@ -533,43 +533,45 @@ It is the default value of the variable `top-level'."
;; for many other file-name variables and directory lists, so it ;; for many other file-name variables and directory lists, so it
;; is important to decode it ASAP. ;; is important to decode it ASAP.
(when locale-coding-system (when locale-coding-system
(save-excursion (let ((coding (if (eq system-type 'windows-nt)
(dolist (elt (buffer-list)) ;; MS-Windows build converts all file names to
(set-buffer elt) ;; UTF-8 during startup.
(if default-directory 'utf-8
(setq default-directory locale-coding-system)))
(decode-coding-string default-directory (save-excursion
locale-coding-system t))))) (dolist (elt (buffer-list))
(set-buffer elt)
;; Decode all the important variables and directory lists, now (if default-directory
;; that we know the locale's encoding. This is because the (setq default-directory
;; values of these variables are until here unibyte undecoded (decode-coding-string default-directory coding t)))))
;; strings created by build_unibyte_string. data-directory in
;; particular is used to construct many other standard directory ;; Decode all the important variables and directory lists, now
;; names, so it must be decoded ASAP. ;; that we know the locale's encoding. This is because the
;; Note that charset-map-path cannot be decoded here, since we ;; values of these variables are until here unibyte undecoded
;; could then be trapped in infinite recursion below, when we ;; strings created by build_unibyte_string. data-directory in
;; load subdirs.el, because encoding a directory name might need ;; particular is used to construct many other standard
;; to load a charset map, which will want to encode ;; directory names, so it must be decoded ASAP. Note that
;; charset-map-path, which will want to load the same charset ;; charset-map-path cannot be decoded here, since we could
;; map... So decoding of charset-map-path is delayed until ;; then be trapped in infinite recursion below, when we load
;; further down below. ;; subdirs.el, because encoding a directory name might need to
(dolist (pathsym '(load-path exec-path)) ;; load a charset map, which will want to encode
(let ((path (symbol-value pathsym))) ;; charset-map-path, which will want to load the same charset
(if (listp path) ;; map... So decoding of charset-map-path is delayed until
(set pathsym (mapcar (lambda (dir) ;; further down below.
(decode-coding-string (dolist (pathsym '(load-path exec-path))
dir (let ((path (symbol-value pathsym)))
locale-coding-system t)) (if (listp path)
path))))) (set pathsym (mapcar (lambda (dir)
(dolist (filesym '(data-directory doc-directory exec-directory (decode-coding-string dir coding t))
installation-directory path)))))
invocation-directory invocation-name (dolist (filesym '(data-directory doc-directory exec-directory
source-directory installation-directory
shared-game-score-directory)) invocation-directory invocation-name
(let ((file (symbol-value filesym))) source-directory
(if (stringp file) shared-game-score-directory))
(set filesym (decode-coding-string file locale-coding-system t)))))) (let ((file (symbol-value filesym)))
(if (stringp file)
(set filesym (decode-coding-string file coding t)))))))
(let ((dir default-directory)) (let ((dir default-directory))
(with-current-buffer "*Messages*" (with-current-buffer "*Messages*"
...@@ -599,12 +601,13 @@ It is the default value of the variable `top-level'." ...@@ -599,12 +601,13 @@ It is the default value of the variable `top-level'."
;; need for encoding them are already loaded, we are ready to ;; need for encoding them are already loaded, we are ready to
;; decode charset-map-path. ;; decode charset-map-path.
(if (listp charset-map-path) (if (listp charset-map-path)
(setq charset-map-path (let ((coding (if (eq system-type 'windows-nt)
(mapcar (lambda (dir) 'utf-8
(decode-coding-string locale-coding-system)))
dir (setq charset-map-path
locale-coding-system t)) (mapcar (lambda (dir)
charset-map-path))) (decode-coding-string dir coding t))
charset-map-path))))
(setq default-directory (abbreviate-file-name default-directory)) (setq default-directory (abbreviate-file-name default-directory))
(let ((old-face-font-rescale-alist face-font-rescale-alist)) (let ((old-face-font-rescale-alist face-font-rescale-alist))
(unwind-protect (unwind-protect
......
...@@ -110,8 +110,13 @@ ...@@ -110,8 +110,13 @@
(let ((f (if (eq system-type 'cygwin) (let ((f (if (eq system-type 'cygwin)
(cygwin-convert-file-name-from-windows file-name t) (cygwin-convert-file-name-from-windows file-name t)
(subst-char-in-string ?\\ ?/ file-name))) (subst-char-in-string ?\\ ?/ file-name)))
(coding (or file-name-coding-system (coding (if (eq system-type 'windows-nt)
default-file-name-coding-system))) ;; Native w32 build pretends that its file names
;; are encoded in UTF-8, and converts to the
;; appropriate encoding internally.
'utf-8
(or file-name-coding-system
default-file-name-coding-system))))
(setq file-name (setq file-name
(mapconcat 'url-hexify-string (mapconcat 'url-hexify-string
......
2013-12-12 Eli Zaretskii <eliz@gnu.org>
* inc/ms-w32.h (MAX_UTF8_PATH): New macro.
(opendir, closedir, readdir, seekdir): Redirect to replacement
functions.
* inc/dirent.h: Make d_name[] be MAXNAMELEN*4 characters long.
2013-11-27 Glenn Morris <rgm@gnu.org> 2013-11-27 Glenn Morris <rgm@gnu.org>
* README.W32: * README.W32:
......
...@@ -40,7 +40,7 @@ struct dirent /* data from readdir() */ ...@@ -40,7 +40,7 @@ struct dirent /* data from readdir() */
__int64 d_time_write; __int64 d_time_write;
_fsize_t d_size; _fsize_t d_size;
#endif #endif
char d_name[MAXNAMLEN+1]; /* name of file */ char d_name[MAXNAMLEN * 4 + 1]; /* name of file */
}; };
typedef struct typedef struct
......
...@@ -152,6 +152,9 @@ extern char *getenv (); ...@@ -152,6 +152,9 @@ extern char *getenv ();
#define MAXPATHLEN _MAX_PATH #define MAXPATHLEN _MAX_PATH
#endif #endif
/* This is used to hold UTF-8 encoded file names. */
#define MAX_UTF8_PATH (MAXPATHLEN * 4)
#ifdef HAVE_NTGUI #ifdef HAVE_NTGUI
# ifndef HAVE_WINDOW_SYSTEM # ifndef HAVE_WINDOW_SYSTEM
# define HAVE_WINDOW_SYSTEM 1 # define HAVE_WINDOW_SYSTEM 1
...@@ -218,6 +221,14 @@ extern struct tm * sys_localtime (const time_t *); ...@@ -218,6 +221,14 @@ extern struct tm * sys_localtime (const time_t *);
#define strerror sys_strerror #define strerror sys_strerror
#undef unlink #undef unlink
#define unlink sys_unlink #define unlink sys_unlink
#undef opendir
#define opendir sys_opendir
#undef closedir
#define closedir sys_closedir
#undef readdir
#define readdir sys_readdir
#undef seekdir
#define seekdir sys_seekdir
/* This prototype is needed because some files include config.h /* This prototype is needed because some files include config.h
_after_ the standard headers, so sys_unlink gets no prototype from _after_ the standard headers, so sys_unlink gets no prototype from
stdio.h or io.h. */ stdio.h or io.h. */
......
2013-12-12 Eli Zaretskii <eliz@gnu.org>
Support file names on MS-Windows that use characters outside of
the current system codepage. (Bug#7100)
* w32.c (get_file_security, set_file_security)
(create_symbolic_link): Separate pointers and boolean flags for
ANSI and Unicode APIs. Use the latter if w32_unicode_filenames is
non-zero, else the former.
(codepage_for_filenames, filename_to_utf16, )
(filename_from_utf16, filename_to_ansi, filename_from_ansi): New
functions.
(init_user_info): Allow $HOME and $SHELL to include non-ANSI
characters.
(normalize_filename): Lose the DBCS code, now works on UTF-8.
Accept only one argument; all callers changed.
(dostounix_filename): Remove the second argument, now works in
UTF-8. All callers changed.
(parse_root): Lose DBCS code.
(get_long_basename, w32_get_short_filename, init_environment)
(GetCachedVolumeInformation, sys_readdir, open_unc_volume)
(read_unc_volume, logon_network_drive, faccessat, sys_chdir)
(sys_chmod, sys_creat, sys_fopen, sys_link, sys_mkdir, sys_open)
(sys_rename_replace, sys_rmdir, sys_unlink, stat_worker, utime)
(is_symlink, readlink, chase_symlinks, w32_delayed_load): Work in
Unicode mode if w32_unicode_filenames is non-zero, in ANSI mode
otherwise.
(ansi_encode_filename): New function.
(get_emacs_configuration, get_emacs_configuration_options):
Functions deleted.
(add_volume_info, GetCachedVolumeInformation): Run the input file
name through unixtodos_filename, to ensure it is stored and
referenced in canonical form.
(get_volume_info): Lose the DBCS code, now works in UTF-8.
(logon_network_drive, sys_link, utime): Improve error handling.
(sys_access): New function.
(hashval, generate_inode_val): Unused functions deleted.
(symlink, readlink, readlinkat): Lose DBCS code, now works in UTF-8.
(check_windows_init_file): Convert error message from UTF-8 to
ANSI codepage, for display in the message box.
(globals_of_w32): Set w32_unicode_filenames according to the OS
version.
* w32term.c (construct_drag_n_drop): Work in Unicode mode when
w32_unicode_filenames is non-zero, ANSI mode otherwise.
(syms_of_w32term): Declare w32-unicode-filenames.
* w32proc.c (new_child, delete_child): Remove code that handled
unused pending_deletion and input_file members of the child struct.
(create_child, sys_spawnve): Convert all file names to ANSI
codepage. Use ANSI APIs explicitly; forcibly fail if any file
name cannot be encoded in ANSI codepage. Don't use
unixtodos_filename, mirror slashes by hand.
(record_infile, record_pending_deletion): Functions deleted.
(Fw32_short_file_name): Call w32_get_short_filename instead of
GetShortPathName.
* w32notify.c (add_watch): Work in Unicode mode when
w32_unicode_filenames is non-zero, ANSI mode otherwise.
(Fw32notify_add_watch): Rewrite to avoid using GetFullPathName;
instead, do the same with Lisp primitives.
* w32fns.c (file_dialog_callback, Fx_file_dialog)
(Fsystem_move_file_to_trash, Fw32_shell_execute)
(Ffile_system_info, Fdefault_printer_name): Work in Unicode mode
when w32_unicode_filenames is non-zero, ANSI mode otherwise.
(Fw32_shell_execute): Improve error reporting.
(Fdefault_printer_name): Ifdef away for Cygwin.
* w32.h (struct _child_process): Remove input_file and
pending_deletion members that are no longer used.
(dostounix_filename, w32_get_short_filename, filename_from_ansi)
(filename_to_ansi, filename_from_utf16, filename_to_utf16)
(ansi_encode_filename): New and updated prototypes.
* unexw32.c (open_input_file, open_output_file, unexec): Use ANSI
APIs explicitly.
(unexec): Don't use dostounix_filename, it expects a file name in
UTF-8. Instead, mirror backslashes by hand. Convert NEW_NAME to
ANSI encoding.
* fileio.c (Ffile_name_directory, file_name_as_directory)
(directory_file_name, Fexpand_file_name)
(Fsubstitute_in_file_name) [WINDOWSNT]: Adapt to the change in
arguments of dostounix_filename.
(Fexpand_file_name) [WINDOWSNT]: Convert value of $HOME to UTF-8.
use MAX_UTF8_PATH for size of file-name strings.
(emacs_readlinkat): Build an explicitly unibyte string for file
names.
(syms_of_fileio) <file-name-coding-system>
default-file-name-coding-system>: Mention MS-Windows peculiarities.
* emacs.c (init_cmdargs) [WINDOWSNT]: Convert argv[0] to UTF-8.
(main) [WINDOWSNT]: Convert the argv[] elements that are files or
directories to UTF-8.
(decode_env_path) [WINDOWSNT]: Convert file names taken from the
environment, and each element of the input PATH, to UTF-8.
* dired.c (file_attributes): Use build_unibyte_string explicitly
to make Lisp strings from user and group names.
* coding.h (ENCODE_FILE, DECODE_FILE): Just call encode_file and
decode_file.
* coding.c (decode_file_name, encode_file_name): New functions.
* termcap.c (tgetent): Adapt to the change in arguments of
dostounix_filename.
* sysdep.c (sys_subshell) [WINDOWSNT]: Use MAX_UTF8_PATH for file
names.
* msdos.c (dostounix_filename, init_environment): Adapt to the
change in arguments of dostounix_filename.
* image.c (xpm_load, tiff_load, gif_load, imagemagick_load)
[WINDOWSNT]: Encode file names passed to the image libraries in
ANSI codepage.
* gnutls.c (Fgnutls_boot): Encode all file names passed to GnuTLS.
[WINDOWSNT]: Convert file names to the current ANSI codepage.
* filelock.c (lock_file) [WINDOWSNT]: Adapt to the change in
arguments of dostounix_filename.
2013-12-12 Dmitry Antipov <dmantipov@yandex.ru> 2013-12-12 Dmitry Antipov <dmantipov@yandex.ru>
* font.h (struct font_entity) [HAVE_NS]: New field to record * font.h (struct font_entity) [HAVE_NS]: New field to record
......
...@@ -9490,6 +9490,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, ...@@ -9490,6 +9490,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
return code_convert_string (string, coding_system, Qt, encodep, 0, 1); return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
} }
/* Encode or decode a file name, to or from a unibyte string suitable
for passing to C library functions. */
Lisp_Object
decode_file_name (Lisp_Object fname)
{
#ifdef WINDOWSNT
/* The w32 build pretends to use UTF-8 for file-name encoding, and
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
return code_convert_string_norecord (fname, Qutf_8, 0);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
else if (! NILP (Vdefault_file_name_coding_system))
return code_convert_string_norecord (fname,
Vdefault_file_name_coding_system, 0);
else
return fname;
#endif
}
Lisp_Object
encode_file_name (Lisp_Object fname)
{
/* This is especially important during bootstrap and dumping, when
file-name encoding is not yet known, and therefore any non-ASCII
file names are unibyte strings, and could only be thrashed if we
try to encode them. */
if (!STRING_MULTIBYTE (fname))
return fname;
#ifdef WINDOWSNT
/* The w32 build pretends to use UTF-8 for file-name encoding, and
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
return code_convert_string_norecord (fname, Qutf_8, 1);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
else if (! NILP (Vdefault_file_name_coding_system))
return code_convert_string_norecord (fname,
Vdefault_file_name_coding_system, 1);
else
return fname;
#endif
}
DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
2, 4, 0, 2, 4, 0,
......
...@@ -670,27 +670,13 @@ struct coding_system ...@@ -670,27 +670,13 @@ struct coding_system
(code) = (s1 << 8) | s2; \ (code) = (s1 << 8) | s2; \
} while (0) } while (0)
/* Encode the file name NAME using the specified coding system for /* Encode the file name NAME using the specified coding system
file names, if any. If NAME is a unibyte string, return NAME. */ for file names, if any. */
#define ENCODE_FILE(name) \ #define ENCODE_FILE(NAME) encode_file_name (NAME)
(! STRING_MULTIBYTE (name) \
? name \
: (! NILP (Vfile_name_coding_system) \
? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
: (! NILP (Vdefault_file_name_coding_system) \
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
: name)))
/* Decode the file name NAME using the specified coding system /* Decode the file name NAME using the specified coding system
for file names, if any. */ for file names, if any. */
#define DECODE_FILE(name) \ #define DECODE_FILE(NAME) decode_file_name (NAME)
(! NILP (Vfile_name_coding_system) \
? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
: (! NILP (Vdefault_file_name_coding_system) \
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
: name))
/* Encode the string STR using the specified coding system /* Encode the string STR using the specified coding system
for system functions, if any. */ for system functions, if any. */
...@@ -718,6 +704,8 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object, ...@@ -718,6 +704,8 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
Lisp_Object, bool, bool, bool); Lisp_Object, bool, bool, bool);
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object, extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
bool); bool);
extern Lisp_Object encode_file_name (Lisp_Object);
extern Lisp_Object decode_file_name (Lisp_Object);
extern Lisp_Object raw_text_coding_system (Lisp_Object); extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
extern Lisp_Object complement_process_encoding_system (Lisp_Object); extern Lisp_Object complement_process_encoding_system (Lisp_Object);
......
...@@ -958,11 +958,11 @@ file_attributes (int fd, char const *name, Lisp_Object id_format) ...@@ -958,11 +958,11 @@ file_attributes (int fd, char const *name, Lisp_Object id_format)
unblock_input (); unblock_input ();
} }
if (uname) if (uname)
values[2] = DECODE_SYSTEM (build_string (uname)); values[2] = DECODE_SYSTEM (build_unibyte_string (uname));
else else
values[2] = make_fixnum_or_float (s.st_uid); values[2] = make_fixnum_or_float (s.st_uid);
if (gname) if (gname)
values[3] = DECODE_SYSTEM (build_string (gname)); values[3] = DECODE_SYSTEM (build_unibyte_string (gname));
else else
values[3] = make_fixnum_or_float (s.st_gid); values[3] = make_fixnum_or_float (s.st_gid);
......
...@@ -36,6 +36,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ ...@@ -36,6 +36,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#ifdef WINDOWSNT #ifdef WINDOWSNT
#include <fcntl.h> #include <fcntl.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <mbstring.h>
#include "w32.h"