Index: gettext-runtime/intl/localename.c =================================================================== RCS file: /cvs/gettext/gettext/gettext-runtime/intl/localename.c,v retrieving revision 1.9 diff -c -3 -r1.9 localename.c *** gettext-runtime/intl/localename.c 10 Jan 2005 15:56:14 -0000 1.9 --- gettext-runtime/intl/localename.c 16 Mar 2005 13:25:26 -0000 *************** *** 694,699 **** --- 694,973 ---- # endif #endif + # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE + /* MacOS X 10.2 or newer */ + + /* Canonicalize a MacOS X locale name to a Unix locale name. + NAME is a sufficiently large buffer. + On input, it contains the MacOS X locale name. + On output, it contains the Unix locale name. */ + void + _nl_locale_name_canonicalize (char *name) + { + /* This conversion is based on a posting by + Deborah GoldSmith on 2005-03-08, + http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */ + + /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and + ISO 3166) names. Prior to MacOS X 10.3, there is no API for doing this. + Therefore we do it ourselves, using a table based on the results of the + MacOS X 10.3.8 function + CFLocaleCreateCanonicalLocaleIdentifierFromString(). */ + typedef struct { const char legacy[21+1]; const char unixy[5+1]; } + legacy_entry; + static const legacy_entry legacy_table[] = { + { "Afrikaans", "af" }, + { "Albanian", "sq" }, + { "Amharic", "am" }, + { "Arabic", "ar" }, + { "Armenian", "hy" }, + { "Assamese", "as" }, + { "Aymara", "ay" }, + { "Azerbaijani", "az" }, + { "Basque", "eu" }, + { "Belarusian", "be" }, + { "Belorussian", "be" }, + { "Bengali", "bn" }, + { "Brazilian Portugese", "pt_BR" }, + { "Brazilian Portuguese", "pt_BR" }, + { "Breton", "br" }, + { "Bulgarian", "bg" }, + { "Burmese", "my" }, + { "Byelorussian", "be" }, + { "Catalan", "ca" }, + { "Chewa", "ny" }, + { "Chichewa", "ny" }, + { "Chinese", "zh" }, + { "Chinese, Simplified", "zh_CN" }, + { "Chinese, Traditional", "zh_TW" }, + { "Chinese, Tradtional", "zh_TW" }, + { "Croatian", "hr" }, + { "Czech", "cs" }, + { "Danish", "da" }, + { "Dutch", "nl" }, + { "Dzongkha", "dz" }, + { "English", "en" }, + { "Esperanto", "eo" }, + { "Estonian", "et" }, + { "Faroese", "fo" }, + { "Farsi", "fa" }, + { "Finnish", "fi" }, + { "Flemish", "nl_BE" }, + { "French", "fr" }, + { "Galician", "gl" }, + { "Gallegan", "gl" }, + { "Georgian", "ka" }, + { "German", "de" }, + { "Greek", "el" }, + { "Greenlandic", "kl" }, + { "Guarani", "gn" }, + { "Gujarati", "gu" }, + { "Hawaiian", "haw" }, /* Yes, "haw", not "cpe". */ + { "Hebrew", "he" }, + { "Hindi", "hi" }, + { "Hungarian", "hu" }, + { "Icelandic", "is" }, + { "Indonesian", "id" }, + { "Inuktitut", "iu" }, + { "Irish", "ga" }, + { "Italian", "it" }, + { "Japanese", "ja" }, + { "Javanese", "jv" }, + { "Kalaallisut", "kl" }, + { "Kannada", "kn" }, + { "Kashmiri", "ks" }, + { "Kazakh", "kk" }, + { "Khmer", "km" }, + { "Kinyarwanda", "rw" }, + { "Kirghiz", "ky" }, + { "Korean", "ko" }, + { "Kurdish", "ku" }, + { "Latin", "la" }, + { "Latvian", "lv" }, + { "Lithuanian", "lt" }, + { "Macedonian", "mk" }, + { "Malagasy", "mg" }, + { "Malay", "ms" }, + { "Malayalam", "ml" }, + { "Maltese", "mt" }, + { "Manx", "gv" }, + { "Marathi", "mr" }, + { "Moldavian", "mo" }, + { "Mongolian", "mn" }, + { "Nepali", "ne" }, + { "Norwegian", "nb" }, /* Yes, "nb", not the obsolete "no". */ + { "Nyanja", "ny" }, + { "Nynorsk", "nn" }, + { "Oriya", "or" }, + { "Oromo", "om" }, + { "Panjabi", "pa" }, + { "Pashto", "ps" }, + { "Persian", "fa" }, + { "Polish", "pl" }, + { "Portuguese", "pt" }, + { "Portuguese, Brazilian", "pt_BR" }, + { "Punjabi", "pa" }, + { "Pushto", "ps" }, + { "Quechua", "qu" }, + { "Romanian", "ro" }, + { "Ruanda", "rw" }, + { "Rundi", "rn" }, + { "Russian", "ru" }, + { "Sami", "se_NO" }, /* Not just "se". */ + { "Sanskrit", "sa" }, + { "Scottish", "gd" }, + { "Serbian", "sr" }, + { "Simplified Chinese", "zh_CN" }, + { "Sindhi", "sd" }, + { "Sinhalese", "si" }, + { "Slovak", "sk" }, + { "Slovenian", "sl" }, + { "Somali", "so" }, + { "Spanish", "es" }, + { "Sundanese", "su" }, + { "Swahili", "sw" }, + { "Swedish", "sv" }, + { "Tagalog", "tl" }, + { "Tajik", "tg" }, + { "Tajiki", "tg" }, + { "Tamil", "ta" }, + { "Tatar", "tt" }, + { "Telugu", "te" }, + { "Thai", "th" }, + { "Tibetan", "bo" }, + { "Tigrinya", "ti" }, + { "Tongan", "to" }, + { "Traditional Chinese", "zh_TW" }, + { "Turkish", "tr" }, + { "Turkmen", "tk" }, + { "Uighur", "ug" }, + { "Ukrainian", "uk" }, + { "Urdu", "ur" }, + { "Uzbek", "uz" }, + { "Vietnamese", "vi" }, + { "Welsh", "cy" }, + { "Yiddish", "yi" } + }; + + /* Convert new-style locale names with language tags (ISO 639 and ISO 15924) + to Unix (ISO 639 and ISO 3166) names. */ + typedef struct { const char langtag[7+1]; const char unixy[12+1]; } + langtag_entry; + static const langtag_entry langtag_table[] = { + /* MacOS X has "az-Arab", "az-Cyrl", "az-Latn". + The default script for az on Unix is Latin. */ + { "az-Latn", "az" }, + /* MacOS X has "ga-dots". Does not yet exist on Unix. */ + { "ga-dots", "ga" }, + /* MacOS X has "kk-Cyrl". Does not yet exist on Unix. */ + /* MacOS X has "mn-Cyrl", "mn-Mong". + The default script for mn on Unix is Cyrillic. */ + { "mn-Cyrl", "mn" }, + /* MacOS X has "ms-Arab", "ms-Latn". + The default script for ms on Unix is Latin. */ + { "ms-Latn", "ms" }, + /* MacOS X has "tg-Cyrl". + The default script for tg on Unix is Cyrillic. */ + { "tg-Cyrl", "tg" }, + /* MacOS X has "tk-Cyrl". Does not yet exist on Unix. */ + /* MacOS X has "tt-Cyrl". + The default script for tt on Unix is Cyrillic. */ + { "tt-Cyrl", "tt" }, + /* MacOS X has "zh-Hans", "zh-Hant". + Country codes are used to distinguish these on Unix. */ + { "zh-Hans", "zh_CN" }, + { "zh-Hant", "zh_TW" } + }; + + /* Convert script names (ISO 15924) to Unix conventions. + See http://www.unicode.org/iso15924/iso15924-codes.html */ + typedef struct { const char script[4+1]; const char unixy[9+1]; } + script_entry; + static const script_entry script_table[] = { + { "Arab", "arabic" }, + { "Cyrl", "cyrillic" }, + { "Mong", "mongolian" } + }; + + /* Step 1: Convert using legacy_table. */ + if (name[0] >= 'A' && name[0] <= 'Z') + { + unsigned int i1, i2; + i1 = 0; + i2 = sizeof (legacy_table) / sizeof (legacy_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if name occurs in legacy_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const legacy_entry *p = &legacy_table[i]; + if (strcmp (name, p->legacy) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name, legacy_table[i1].legacy) == 0) + { + strcpy (name, legacy_table[i1].unixy); + return; + } + } + + /* Step 2: Convert using langtag_table and script_table. */ + if (strlen (name) == 7 && name[2] == '-') + { + unsigned int i1, i2; + i1 = 0; + i2 = sizeof (langtag_table) / sizeof (langtag_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if name occurs in langtag_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const langtag_entry *p = &langtag_table[i]; + if (strcmp (name, p->langtag) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name, langtag_table[i1].langtag) == 0) + { + strcpy (name, langtag_table[i1].unixy); + return; + } + + i1 = 0; + i2 = sizeof (script_table) / sizeof (script_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if (name + 3) occurs in script_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const script_entry *p = &script_table[i]; + if (strcmp (name + 3, p->script) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name + 3, script_table[i1].script) == 0) + { + name[2] = '@'; + strcpy (name + 3, script_table[i1].unixy); + return; + } + } + + /* Step 3: Convert new-style dash to Unix underscore. */ + { + char *p; + for (p = name; *p != '\0'; p++) + if (*p == '-') + *p = '_'; + } + } + + #endif + /* XPG3 defines the result of 'setlocale (category, NULL)' as: "Directs 'setlocale()' to query 'category' and return the current setting of 'local'." *************** *** 777,783 **** if (CFStringGetCString (name, namebuf, sizeof(namebuf), kCFStringEncodingASCII)) ! cached_localename = strdup (namebuf); CFRelease (locale); # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */ CFTypeRef value = --- 1051,1060 ---- if (CFStringGetCString (name, namebuf, sizeof(namebuf), kCFStringEncodingASCII)) ! { ! _nl_locale_name_canonicalize (namebuf); ! cached_localename = strdup (namebuf); ! } CFRelease (locale); # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */ CFTypeRef value = *************** *** 787,793 **** && CFGetTypeID (value) == CFStringGetTypeID () && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf), kCFStringEncodingASCII)) ! cached_localename = strdup (namebuf); # endif if (cached_localename == NULL) cached_localename = "C"; --- 1064,1073 ---- && CFGetTypeID (value) == CFStringGetTypeID () && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf), kCFStringEncodingASCII)) ! { ! _nl_locale_name_canonicalize (namebuf); ! cached_localename = strdup (namebuf); ! } # endif if (cached_localename == NULL) cached_localename = "C"; Index: gettext-runtime/intl/langprefs.c =================================================================== RCS file: /cvs/gettext/gettext/gettext-runtime/intl/langprefs.c,v retrieving revision 1.3 diff -c -3 -r1.3 langprefs.c *** gettext-runtime/intl/langprefs.c 10 Jan 2005 15:56:14 -0000 1.3 --- gettext-runtime/intl/langprefs.c 16 Mar 2005 13:25:26 -0000 *************** *** 30,35 **** --- 30,36 ---- # include # include # include + extern void _nl_locale_name_canonicalize (char *name); #endif /* Determine the user's language preferences, as a colon separated list of *************** *** 71,76 **** --- 72,78 ---- buf, sizeof (buf), kCFStringEncodingASCII)) { + _nl_locale_name_canonicalize (buf); size += strlen (buf) + 1; /* Most GNU programs use msgids in English and don't ship an en.mo message catalog. Therefore when we see "en" *************** *** 101,106 **** --- 103,109 ---- buf, sizeof (buf), kCFStringEncodingASCII)) { + _nl_locale_name_canonicalize (buf); strcpy (p, buf); p += strlen (buf); *p++ = ':';