Index: gettext-runtime/intl/localename.c
===================================================================
RCS file: /cvs/gettext/gettext/gettext-runtime/intl/localename.c,v
retrieving revision 1.9
diff -c -3 -r1.9 localename.c
*** gettext-runtime/intl/localename.c	10 Jan 2005 15:56:14 -0000	1.9
--- gettext-runtime/intl/localename.c	16 Mar 2005 13:25:26 -0000
***************
*** 694,699 ****
--- 694,973 ----
  # endif
  #endif
  
+ # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
+ /* MacOS X 10.2 or newer */
+ 
+ /* Canonicalize a MacOS X locale name to a Unix locale name.
+    NAME is a sufficiently large buffer.
+    On input, it contains the MacOS X locale name.
+    On output, it contains the Unix locale name.  */
+ void
+ _nl_locale_name_canonicalize (char *name)
+ {
+   /* This conversion is based on a posting by
+      Deborah GoldSmith <goldsmit@apple.com> on 2005-03-08,
+      http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */
+ 
+   /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and
+      ISO 3166) names.  Prior to MacOS X 10.3, there is no API for doing this.
+      Therefore we do it ourselves, using a table based on the results of the
+      MacOS X 10.3.8 function
+      CFLocaleCreateCanonicalLocaleIdentifierFromString().  */
+   typedef struct { const char legacy[21+1]; const char unixy[5+1]; }
+ 	  legacy_entry;
+   static const legacy_entry legacy_table[] = {
+     { "Afrikaans",             "af" },
+     { "Albanian",              "sq" },
+     { "Amharic",               "am" },
+     { "Arabic",                "ar" },
+     { "Armenian",              "hy" },
+     { "Assamese",              "as" },
+     { "Aymara",                "ay" },
+     { "Azerbaijani",           "az" },
+     { "Basque",                "eu" },
+     { "Belarusian",            "be" },
+     { "Belorussian",           "be" },
+     { "Bengali",               "bn" },
+     { "Brazilian Portugese",   "pt_BR" },
+     { "Brazilian Portuguese",  "pt_BR" },
+     { "Breton",                "br" },
+     { "Bulgarian",             "bg" },
+     { "Burmese",               "my" },
+     { "Byelorussian",          "be" },
+     { "Catalan",               "ca" },
+     { "Chewa",                 "ny" },
+     { "Chichewa",              "ny" },
+     { "Chinese",               "zh" },
+     { "Chinese, Simplified",   "zh_CN" },
+     { "Chinese, Traditional",  "zh_TW" },
+     { "Chinese, Tradtional",   "zh_TW" },
+     { "Croatian",              "hr" },
+     { "Czech",                 "cs" },
+     { "Danish",                "da" },
+     { "Dutch",                 "nl" },
+     { "Dzongkha",              "dz" },
+     { "English",               "en" },
+     { "Esperanto",             "eo" },
+     { "Estonian",              "et" },
+     { "Faroese",               "fo" },
+     { "Farsi",                 "fa" },
+     { "Finnish",               "fi" },
+     { "Flemish",               "nl_BE" },
+     { "French",                "fr" },
+     { "Galician",              "gl" },
+     { "Gallegan",              "gl" },
+     { "Georgian",              "ka" },
+     { "German",                "de" },
+     { "Greek",                 "el" },
+     { "Greenlandic",           "kl" },
+     { "Guarani",               "gn" },
+     { "Gujarati",              "gu" },
+     { "Hawaiian",              "haw" }, /* Yes, "haw", not "cpe".  */
+     { "Hebrew",                "he" },
+     { "Hindi",                 "hi" },
+     { "Hungarian",             "hu" },
+     { "Icelandic",             "is" },
+     { "Indonesian",            "id" },
+     { "Inuktitut",             "iu" },
+     { "Irish",                 "ga" },
+     { "Italian",               "it" },
+     { "Japanese",              "ja" },
+     { "Javanese",              "jv" },
+     { "Kalaallisut",           "kl" },
+     { "Kannada",               "kn" },
+     { "Kashmiri",              "ks" },
+     { "Kazakh",                "kk" },
+     { "Khmer",                 "km" },
+     { "Kinyarwanda",           "rw" },
+     { "Kirghiz",               "ky" },
+     { "Korean",                "ko" },
+     { "Kurdish",               "ku" },
+     { "Latin",                 "la" },
+     { "Latvian",               "lv" },
+     { "Lithuanian",            "lt" },
+     { "Macedonian",            "mk" },
+     { "Malagasy",              "mg" },
+     { "Malay",                 "ms" },
+     { "Malayalam",             "ml" },
+     { "Maltese",               "mt" },
+     { "Manx",                  "gv" },
+     { "Marathi",               "mr" },
+     { "Moldavian",             "mo" },
+     { "Mongolian",             "mn" },
+     { "Nepali",                "ne" },
+     { "Norwegian",             "nb" }, /* Yes, "nb", not the obsolete "no".  */
+     { "Nyanja",                "ny" },
+     { "Nynorsk",               "nn" },
+     { "Oriya",                 "or" },
+     { "Oromo",                 "om" },
+     { "Panjabi",               "pa" },
+     { "Pashto",                "ps" },
+     { "Persian",               "fa" },
+     { "Polish",                "pl" },
+     { "Portuguese",            "pt" },
+     { "Portuguese, Brazilian", "pt_BR" },
+     { "Punjabi",               "pa" },
+     { "Pushto",                "ps" },
+     { "Quechua",               "qu" },
+     { "Romanian",              "ro" },
+     { "Ruanda",                "rw" },
+     { "Rundi",                 "rn" },
+     { "Russian",               "ru" },
+     { "Sami",                  "se_NO" }, /* Not just "se".  */
+     { "Sanskrit",              "sa" },
+     { "Scottish",              "gd" },
+     { "Serbian",               "sr" },
+     { "Simplified Chinese",    "zh_CN" },
+     { "Sindhi",                "sd" },
+     { "Sinhalese",             "si" },
+     { "Slovak",                "sk" },
+     { "Slovenian",             "sl" },
+     { "Somali",                "so" },
+     { "Spanish",               "es" },
+     { "Sundanese",             "su" },
+     { "Swahili",               "sw" },
+     { "Swedish",               "sv" },
+     { "Tagalog",               "tl" },
+     { "Tajik",                 "tg" },
+     { "Tajiki",                "tg" },
+     { "Tamil",                 "ta" },
+     { "Tatar",                 "tt" },
+     { "Telugu",                "te" },
+     { "Thai",                  "th" },
+     { "Tibetan",               "bo" },
+     { "Tigrinya",              "ti" },
+     { "Tongan",                "to" },
+     { "Traditional Chinese",   "zh_TW" },
+     { "Turkish",               "tr" },
+     { "Turkmen",               "tk" },
+     { "Uighur",                "ug" },
+     { "Ukrainian",             "uk" },
+     { "Urdu",                  "ur" },
+     { "Uzbek",                 "uz" },
+     { "Vietnamese",            "vi" },
+     { "Welsh",                 "cy" },
+     { "Yiddish",               "yi" }
+   };
+ 
+   /* Convert new-style locale names with language tags (ISO 639 and ISO 15924)
+      to Unix (ISO 639 and ISO 3166) names.  */
+   typedef struct { const char langtag[7+1]; const char unixy[12+1]; }
+ 	  langtag_entry;
+   static const langtag_entry langtag_table[] = {
+     /* MacOS X has "az-Arab", "az-Cyrl", "az-Latn".
+        The default script for az on Unix is Latin.  */
+     { "az-Latn", "az" },
+     /* MacOS X has "ga-dots".  Does not yet exist on Unix.  */
+     { "ga-dots", "ga" },
+     /* MacOS X has "kk-Cyrl".  Does not yet exist on Unix.  */
+     /* MacOS X has "mn-Cyrl", "mn-Mong".
+        The default script for mn on Unix is Cyrillic.  */
+     { "mn-Cyrl", "mn" },
+     /* MacOS X has "ms-Arab", "ms-Latn".
+        The default script for ms on Unix is Latin.  */
+     { "ms-Latn", "ms" },
+     /* MacOS X has "tg-Cyrl".
+        The default script for tg on Unix is Cyrillic.  */
+     { "tg-Cyrl", "tg" },
+     /* MacOS X has "tk-Cyrl".  Does not yet exist on Unix.  */
+     /* MacOS X has "tt-Cyrl".
+        The default script for tt on Unix is Cyrillic.  */
+     { "tt-Cyrl", "tt" },
+     /* MacOS X has "zh-Hans", "zh-Hant".
+        Country codes are used to distinguish these on Unix.  */
+     { "zh-Hans", "zh_CN" },
+     { "zh-Hant", "zh_TW" }
+   };
+ 
+   /* Convert script names (ISO 15924) to Unix conventions.
+      See http://www.unicode.org/iso15924/iso15924-codes.html  */
+   typedef struct { const char script[4+1]; const char unixy[9+1]; }
+ 	  script_entry;
+   static const script_entry script_table[] = {
+     { "Arab", "arabic" },
+     { "Cyrl", "cyrillic" },
+     { "Mong", "mongolian" }
+   };
+ 
+   /* Step 1: Convert using legacy_table.  */
+   if (name[0] >= 'A' && name[0] <= 'Z')
+     {
+       unsigned int i1, i2;
+       i1 = 0;
+       i2 = sizeof (legacy_table) / sizeof (legacy_entry);
+       while (i2 - i1 > 1)
+ 	{
+ 	  /* At this point we know that if name occurs in legacy_table,
+ 	     its index must be >= i1 and < i2.  */
+ 	  unsigned int i = (i1 + i2) >> 1;
+ 	  const legacy_entry *p = &legacy_table[i];
+ 	  if (strcmp (name, p->legacy) < 0)
+ 	    i2 = i;
+ 	  else
+ 	    i1 = i;
+ 	}
+       if (strcmp (name, legacy_table[i1].legacy) == 0)
+ 	{
+ 	  strcpy (name, legacy_table[i1].unixy);
+ 	  return;
+ 	}
+     }
+ 
+   /* Step 2: Convert using langtag_table and script_table.  */
+   if (strlen (name) == 7 && name[2] == '-')
+     {
+       unsigned int i1, i2;
+       i1 = 0;
+       i2 = sizeof (langtag_table) / sizeof (langtag_entry);
+       while (i2 - i1 > 1)
+ 	{
+ 	  /* At this point we know that if name occurs in langtag_table,
+ 	     its index must be >= i1 and < i2.  */
+ 	  unsigned int i = (i1 + i2) >> 1;
+ 	  const langtag_entry *p = &langtag_table[i];
+ 	  if (strcmp (name, p->langtag) < 0)
+ 	    i2 = i;
+ 	  else
+ 	    i1 = i;
+ 	}
+       if (strcmp (name, langtag_table[i1].langtag) == 0)
+ 	{
+ 	  strcpy (name, langtag_table[i1].unixy);
+ 	  return;
+ 	}
+ 
+       i1 = 0;
+       i2 = sizeof (script_table) / sizeof (script_entry);
+       while (i2 - i1 > 1)
+ 	{
+ 	  /* At this point we know that if (name + 3) occurs in script_table,
+ 	     its index must be >= i1 and < i2.  */
+ 	  unsigned int i = (i1 + i2) >> 1;
+ 	  const script_entry *p = &script_table[i];
+ 	  if (strcmp (name + 3, p->script) < 0)
+ 	    i2 = i;
+ 	  else
+ 	    i1 = i;
+ 	}
+       if (strcmp (name + 3, script_table[i1].script) == 0)
+ 	{
+ 	  name[2] = '@';
+ 	  strcpy (name + 3, script_table[i1].unixy);
+ 	  return;
+ 	}
+     }
+ 
+   /* Step 3: Convert new-style dash to Unix underscore. */
+   {
+     char *p;
+     for (p = name; *p != '\0'; p++)
+       if (*p == '-')
+ 	*p = '_';
+   }
+ }
+ 
+ #endif
+ 
  /* XPG3 defines the result of 'setlocale (category, NULL)' as:
     "Directs 'setlocale()' to query 'category' and return the current
      setting of 'local'."
***************
*** 777,783 ****
  
  	if (CFStringGetCString (name, namebuf, sizeof(namebuf),
  				kCFStringEncodingASCII))
! 	  cached_localename = strdup (namebuf);
  	CFRelease (locale);
  #  elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */
  	CFTypeRef value =
--- 1051,1060 ----
  
  	if (CFStringGetCString (name, namebuf, sizeof(namebuf),
  				kCFStringEncodingASCII))
! 	  {
! 	    _nl_locale_name_canonicalize (namebuf);
! 	    cached_localename = strdup (namebuf);
! 	  }
  	CFRelease (locale);
  #  elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */
  	CFTypeRef value =
***************
*** 787,793 ****
  	    && CFGetTypeID (value) == CFStringGetTypeID ()
  	    && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf),
  				   kCFStringEncodingASCII))
! 	  cached_localename = strdup (namebuf);
  #  endif
  	if (cached_localename == NULL)
  	  cached_localename = "C";
--- 1064,1073 ----
  	    && CFGetTypeID (value) == CFStringGetTypeID ()
  	    && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf),
  				   kCFStringEncodingASCII))
! 	  {
! 	    _nl_locale_name_canonicalize (namebuf);
! 	    cached_localename = strdup (namebuf);
! 	  }
  #  endif
  	if (cached_localename == NULL)
  	  cached_localename = "C";
Index: gettext-runtime/intl/langprefs.c
===================================================================
RCS file: /cvs/gettext/gettext/gettext-runtime/intl/langprefs.c,v
retrieving revision 1.3
diff -c -3 -r1.3 langprefs.c
*** gettext-runtime/intl/langprefs.c	10 Jan 2005 15:56:14 -0000	1.3
--- gettext-runtime/intl/langprefs.c	16 Mar 2005 13:25:26 -0000
***************
*** 30,35 ****
--- 30,36 ----
  # include <CFPropertyList.h>
  # include <CFArray.h>
  # include <CFString.h>
+ extern void _nl_locale_name_canonicalize (char *name);
  #endif
  
  /* Determine the user's language preferences, as a colon separated list of
***************
*** 71,76 ****
--- 72,78 ----
  					   buf, sizeof (buf),
  					   kCFStringEncodingASCII))
  		  {
+ 		    _nl_locale_name_canonicalize (buf);
  		    size += strlen (buf) + 1;
  		    /* Most GNU programs use msgids in English and don't ship
  		       an en.mo message catalog.  Therefore when we see "en"
***************
*** 101,106 ****
--- 103,109 ----
  						   buf, sizeof (buf),
  						   kCFStringEncodingASCII))
  			  {
+ 			    _nl_locale_name_canonicalize (buf);
  			    strcpy (p, buf);
  			    p += strlen (buf);
  			    *p++ = ':';