Commit 015ea0ff authored by Eli Zaretskii's avatar Eli Zaretskii

Implement locale-sensitive string collation for MS-Windows. (Bug#18051)

 src/w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
 New functions.
 src/w32.h (w32_compare_strings): Add prototype.
 src/w32.c <g_b_init_compare_string_w>: New global flag.
 (globals_of_w32): Initialize it.
 src/sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
 src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
 [WINDOWSNT]: Call str_collate on MS-Windows.

 etc/NEWS: Mention that string-collate-* functions are supported on
 MS-Windows as well.
parent 8661ebaa
2014-08-25 Eli Zaretskii <eliz@gnu.org>
* NEWS: Mention that string-collate-* functions are supported on
MS-Windows as well.
2014-08-08 Jan Nieuwenhuizen <janneke@gnu.org> 2014-08-08 Jan Nieuwenhuizen <janneke@gnu.org>
* compilation.txt (file): Add Guile backtrace example. * compilation.txt (file): Add Guile backtrace example.
......
...@@ -68,9 +68,9 @@ variable `read-hide-char'. ...@@ -68,9 +68,9 @@ variable `read-hide-char'.
** The new functions `string-collate-lessp' and `string-collate-equalp' ** The new functions `string-collate-lessp' and `string-collate-equalp'
preserve the collation order as defined by the system's locale(1) preserve the collation order as defined by the system's locale(1)
environment. For the time being this is implemented for POSIX systems environment. For the time being this is implemented for modern POSIX
only, for other systems they fall back to their counterparts systems and for MS-Windows, for other systems they fall back to their
`string-lessp' and `string-equal'. counterparts `string-lessp' and `string-equal'.
* Editing Changes in Emacs 24.5 * Editing Changes in Emacs 24.5
......
2014-08-25 Eli Zaretskii <eliz@gnu.org>
Implement locale-sensitive string collation for MS-Windows.
* w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
New functions. (Bug#18051)
* w32.h (w32_compare_strings): Add prototype.
* w32.c <g_b_init_compare_string_w>: New global flag.
(globals_of_w32): Initialize it.
* sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
* fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
[WINDOWSNT]: Call str_collate on MS-Windows.
2014-08-25 Dmitry Antipov <dmantipov@yandex.ru> 2014-08-25 Dmitry Antipov <dmantipov@yandex.ru>
One more minor cleanup of font subsystem. One more minor cleanup of font subsystem.
......
...@@ -364,7 +364,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment', ...@@ -364,7 +364,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */) it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2) (Lisp_Object s1, Lisp_Object s2)
{ {
#ifdef __STDC_ISO_10646__ #if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */ /* Check parameters. */
if (SYMBOLP (s1)) if (SYMBOLP (s1))
s1 = SYMBOL_NAME (s1); s1 = SYMBOL_NAME (s1);
...@@ -375,9 +375,9 @@ it overrides the setting of your current locale. */) ...@@ -375,9 +375,9 @@ it overrides the setting of your current locale. */)
return (str_collate (s1, s2) < 0) ? Qt : Qnil; return (str_collate (s1, s2) < 0) ? Qt : Qnil;
#else #else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_lessp (s1, s2); return Fstring_lessp (s1, s2);
#endif /* __STDC_ISO_10646__ */ #endif /* !__STDC_ISO_10646__, !WINDOWSNT */
} }
DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0, DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0,
...@@ -401,7 +401,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment', ...@@ -401,7 +401,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */) it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2) (Lisp_Object s1, Lisp_Object s2)
{ {
#ifdef __STDC_ISO_10646__ #if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */ /* Check parameters. */
if (SYMBOLP (s1)) if (SYMBOLP (s1))
s1 = SYMBOL_NAME (s1); s1 = SYMBOL_NAME (s1);
...@@ -412,9 +412,9 @@ it overrides the setting of your current locale. */) ...@@ -412,9 +412,9 @@ it overrides the setting of your current locale. */)
return (str_collate (s1, s2) == 0) ? Qt : Qnil; return (str_collate (s1, s2) == 0) ? Qt : Qnil;
#else #else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_equal (s1, s2); return Fstring_equal (s1, s2);
#endif /* __STDC_ISO_10646__ */ #endif /* !__STDC_ISO_10646__, !WINDOWSNT */
} }
static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args, static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args,
......
...@@ -3592,3 +3592,15 @@ str_collate (Lisp_Object s1, Lisp_Object s2) ...@@ -3592,3 +3592,15 @@ str_collate (Lisp_Object s1, Lisp_Object s2)
return res; return res;
} }
#endif /* __STDC_ISO_10646__ */ #endif /* __STDC_ISO_10646__ */
#ifdef WINDOWSNT
ptrdiff_t
str_collate (Lisp_Object s1, Lisp_Object s2)
{
Lisp_Object lc_collate =
Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL;
return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
}
#endif /* WINDOWSNT */
...@@ -309,6 +309,8 @@ static BOOL g_b_init_set_named_security_info_w; ...@@ -309,6 +309,8 @@ static BOOL g_b_init_set_named_security_info_w;
static BOOL g_b_init_set_named_security_info_a; static BOOL g_b_init_set_named_security_info_a;
static BOOL g_b_init_get_adapters_info; static BOOL g_b_init_get_adapters_info;
BOOL g_b_init_compare_string_w;
/* /*
BEGIN: Wrapper functions around OpenProcessToken BEGIN: Wrapper functions around OpenProcessToken
and other functions in advapi32.dll that are only and other functions in advapi32.dll that are only
...@@ -9068,6 +9070,7 @@ globals_of_w32 (void) ...@@ -9068,6 +9070,7 @@ globals_of_w32 (void)
g_b_init_set_named_security_info_w = 0; g_b_init_set_named_security_info_w = 0;
g_b_init_set_named_security_info_a = 0; g_b_init_set_named_security_info_a = 0;
g_b_init_get_adapters_info = 0; g_b_init_get_adapters_info = 0;
g_b_init_compare_string_w = 0;
num_of_processors = 0; num_of_processors = 0;
/* The following sets a handler for shutdown notifications for /* The following sets a handler for shutdown notifications for
console apps. This actually applies to Emacs in both console and console apps. This actually applies to Emacs in both console and
......
...@@ -210,6 +210,9 @@ extern int sys_link (const char *, const char *); ...@@ -210,6 +210,9 @@ extern int sys_link (const char *, const char *);
extern int w32_memory_info (unsigned long long *, unsigned long long *, extern int w32_memory_info (unsigned long long *, unsigned long long *,
unsigned long long *, unsigned long long *); unsigned long long *, unsigned long long *);
/* Compare 2 UTF-8 strings in locale-dependent fashion. */
extern int w32_compare_strings (const char *, const char *, char *);
#ifdef HAVE_GNUTLS #ifdef HAVE_GNUTLS
#include <gnutls/gnutls.h> #include <gnutls/gnutls.h>
......
...@@ -32,6 +32,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ ...@@ -32,6 +32,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <signal.h> #include <signal.h>
#include <sys/file.h> #include <sys/file.h>
#include <mbstring.h> #include <mbstring.h>
#include <locale.h>
/* must include CRT headers *before* config.h */ /* must include CRT headers *before* config.h */
#include <config.h> #include <config.h>
...@@ -3144,6 +3145,159 @@ If successful, the new layout id is returned, otherwise nil. */) ...@@ -3144,6 +3145,159 @@ If successful, the new layout id is returned, otherwise nil. */)
return Fw32_get_keyboard_layout (); return Fw32_get_keyboard_layout ();
} }
/* Two variables to interface between get_lcid and the EnumLocales
callback function below. */
#ifndef LOCALE_NAME_MAX_LENGTH
# define LOCALE_NAME_MAX_LENGTH 85
#endif
static LCID found_lcid;
static char lname[3 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
/* Callback function for EnumLocales. */
static BOOL CALLBACK
get_lcid_callback (LPTSTR locale_num_str)
{
char *endp;
char locval[2 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
LCID try_lcid = strtoul (locale_num_str, &endp, 16);
if (GetLocaleInfo (try_lcid, LOCALE_SABBREVLANGNAME,
locval, LOCALE_NAME_MAX_LENGTH))
{
strcat (locval, "_");
if (GetLocaleInfo (try_lcid, LOCALE_SABBREVCTRYNAME,
locval + strlen (locval), LOCALE_NAME_MAX_LENGTH))
{
size_t locval_len = strlen (locval);
if (strnicmp (locval, lname, locval_len) == 0
&& (lname[locval_len] == '.'
|| lname[locval_len] == '\0'))
{
found_lcid = try_lcid;
return FALSE;
}
}
}
return TRUE;
}
/* Return the Locale ID (LCID) number given the locale's name, a
string, in LOCALE_NAME. This works by enumerating all the locales
supported by the system, until we find one whose name matches
LOCALE_NAME. */
static LCID
get_lcid (const char *locale_name)
{
/* A simple cache. */
static LCID last_lcid;
static char last_locale[1000];
/* The code below is not thread-safe, as it uses static variables.
But this function is called only from the Lisp thread. */
if (last_lcid > 0 && strcmp (locale_name, last_locale) == 0)
return last_lcid;
strncpy (lname, locale_name, sizeof (lname) - 1);
lname[sizeof (lname) - 1] = '\0';
found_lcid = 0;
EnumSystemLocales (get_lcid_callback, LCID_SUPPORTED);
if (found_lcid > 0)
{
last_lcid = found_lcid;
strcpy (last_locale, locale_name);
}
return found_lcid;
}
#ifndef _NSLCMPERROR
# define _NSLCMPERROR INT_MAX
#endif
int
w32_compare_strings (const char *s1, const char *s2, char *locname)
{
LCID lcid = GetThreadLocale ();
wchar_t *string1_w, *string2_w;
int val, needed;
extern BOOL g_b_init_compare_string_w;
static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int);
USE_SAFE_ALLOCA;
if (!g_b_init_compare_string_w)
{
if (os_subtype == OS_9X)
{
pCompareStringW = GetProcAddress (LoadLibrary ("Unicows.dll"),
"CompareStringW");
if (!pCompareStringW)
{
errno = EINVAL;
/* This return value is compatible with wcscoll and
other MS CRT functions. */
return _NSLCMPERROR;
}
}
else
pCompareStringW = CompareStringW;
g_b_init_compare_string_w = 1;
}
needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1, NULL, 0);
if (needed > 0)
{
SAFE_NALLOCA (string1_w, 1, needed + 1);
pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1,
string1_w, needed);
}
else
{
errno = EINVAL;
return _NSLCMPERROR;
}
needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1, NULL, 0);
if (needed > 0)
{
SAFE_NALLOCA (string2_w, 1, needed + 1);
pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1,
string2_w, needed);
}
else
{
SAFE_FREE ();
errno = EINVAL;
return _NSLCMPERROR;
}
if (locname)
{
/* Convert locale name string to LCID. We don't want to use
LocaleNameToLCID because (a) it is only available since
Vista, and (b) it doesn't accept locale names returned by
'setlocale' and 'GetLocaleInfo'. */
LCID new_lcid = get_lcid (locname);
if (new_lcid > 0)
lcid = new_lcid;
}
/* FIXME: Need a way to control the FLAGS argument, perhaps via the
CODESET part of LOCNAME. In particular, ls-lisp will want
NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or
NORM_IGNORECASE. */
val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1);
SAFE_FREE ();
if (!val)
{
errno = EINVAL;
return _NSLCMPERROR;
}
return val - 2;
}
void void
syms_of_ntproc (void) syms_of_ntproc (void)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment