Permalink
Browse files

Updated internal ISO 639-1 language codes with latest standards.

Includes 54 language code additions, some name modifications, and
marking a few deprecated.
  • Loading branch information...
luccioman committed Sep 2, 2017
1 parent a284280 commit 8e4f31bdc7491f32e68a7d1796d7d33b5050cfbe
Showing with 103 additions and 34 deletions.
  1. +103 −34 source/net/yacy/kelondro/util/ISO639.java
@@ -1,4 +1,4 @@
// iso639.java
// ISO639.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 19.09.2008 on http://yacy.net
//
@@ -30,146 +30,215 @@
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* Support for ISO 639 language codes.
* @see <a href="https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">Wikipedia list of ISO 639-1 codes</a>
* @see <a href="http://www.loc.gov/standards/iso639-2/php/code_list.php">Language Code List from the ISO 639-2 Registration Authority (Library of Congress)</a>
* @see <a href="http://www-01.sil.org/iso639-3/">Home page of the ISO 639-3 Registration Authority (SIL International)</a>
* @see <a href="https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry">IANA language subtag registry</a>
* @see <a href="http://www.loc.gov/standards/iso639-2/php/code_changes.php">Code Changes history from the ISO 639-2 Registration Authority</a>
*/
public class ISO639 {
/*
* Note : using icu4j package classes such as com.ibm.icu.impl.LocaleIDs may be
* considered to maintain a more up to date support of ISO 639 codes, notably to
* support ISO 639 3 letters language codes.
*/
/** ISO 639-1 language codes table : [two letters code] - [ISO Reference name] */
private static final String[] codes = {
"aa-Afar",
"ab-Abkhazian",
"ae-Avestan",
"af-Afrikaans",
"ak-Akan",
"am-Amharic",
"an-Aragonese",
"ar-Arabic",
"as-Assamese",
"av-Avaric",
"ay-Aymara",
"az-Azerbaijani",
"ba-Bashkir",
"be-Byelorussian",
"be-Belarusian",
"bg-Bulgarian",
"bh-Bihari",
"bh-Bihari", // collective language code for bho-Bhojpuri, mag-Magahi, and mai-Maithili
"bi-Bislama",
"bn-Bengali;-Bangla",
"bm-Bambara",
"bn-Bengali",
"bo-Tibetan",
"br-Breton",
"bs-Bosnian",
"ca-Catalan",
"ce-Chechen",
"ch-Chamorro",
"co-Corsican",
"cr-Cree",
"cs-Czech",
"cu-Church Slavic",
"cv-Chuvash",
"cy-Welsh",
"da-Danish",
"de-German",
"dz-Bhutani",
"el-Greek",
"dv-Dhivehi",
"dz-Dzongkha",
"ee-Ewe",
"el-Modern Greek (1453-)",
"en-English",
"eo-Esperanto",
"es-Spanish",
"et-Estonian",
"eu-Basque",
"fa-Persian",
"ff-Fulah",
"fi-Finnish",
"fj-Fiji",
"fo-Faeroese",
"fj-Fijian",
"fo-Faroese",
"fr-French",
"fy-Frisian",
"fy-Western Frisian",
"ga-Irish",
"gd-Scots-Gaelic",
"gd-Scottish Gaelic",
"gl-Galician",
"gn-Guarani",
"gu-Gujarati",
"gv-Manx",
"ha-Hausa",
"he-Hebrew",
"hi-Hindi",
"ho-Hiri Motu",
"hr-Croatian",
"ht-Haitian",
"hu-Hungarian",
"hy-Armenian",
"hz-Herero",
"ia-Interlingua",
"id-Indonesian",
"ie-Interlingue",
"ik-Inupiak",
"in-Indonesian",
"ig-Igbo",
"ii-Sichuan Yi",
"ik-Inupiaq",
"in-Indonesian", // deprecated on 1989-03-11 in favor of id-Indonesian
"io-Ido",
"is-Icelandic",
"it-Italian",
"iw-Hebrew",
"iu-Inuktitut",
"iw-Hebrew", // deprecated on 1989-03-11 in favor of he-Hebrew
"ja-Japanese",
"ji-Yiddish",
"jw-Javanese",
"ji-Yiddish", // deprecated on 1989-03-11 in favor of yi-Yiddish
"jv-Javanese",
"ka-Georgian",
"kg-Kongo",
"ki-Kikuyu",
"kj-Kuanyama",
"kk-Kazakh",
"kl-Greenlandic",
"km-Cambodian",
"kl-Kalaallisut; Greenlandic",
"km-Central Khmer",
"kn-Kannada",
"ko-Korean",
"kr-Kanuri",
"ks-Kashmiri",
"ku-Kurdish",
"kv-Komi",
"kw-Cornish",
"ky-Kirghiz",
"la-Latin",
"lb-Luxembourgish",
"lg-Ganda",
"li-Limburgan",
"ln-Lingala",
"lo-Laothian",
"lo-Lao",
"lt-Lithuanian",
"lv-Latvian,-Lettish",
"lu-Luba-Katanga",
"lv-Latvian",
"mg-Malagasy",
"mh-Marshallese",
"mi-Maori",
"mk-Macedonian",
"ml-Malayalam",
"mn-Mongolian",
//"mo-Moldavian", // this maps on 'mozilla' :(
//"mo-Moldavian", // this maps on 'mozilla' :( // deprecated on 2008-11-03 in favor of ro-Romanian to be used for the variant of the Romanian language also known as Moldavian
"mr-Marathi",
"ms-Malay",
"mt-Maltese",
"my-Burmese",
"na-Nauru",
"nb-Norwegian Bokmål",
"nd-North Ndebele",
"ne-Nepali",
"ng-Ndonga",
"nl-Dutch",
"nn-Norwegian Nynorsk",
"no-Norwegian",
"oc-Occitan",
"om-(Afan)-Oromo",
"nr-South Ndebele",
"nv-Navajo",
"ny-Nyanja",
"oc-Occitan (post 1500)",
"oj-Ojibwa",
"om-Oromo",
"or-Oriya",
"pa-Punjabi",
"os-Ossetian",
"pa-Panjabi; Punjabi",
"pi-Pali",
"pl-Polish",
"ps-Pashto,-Pushto",
"ps-Pushto; Pashto",
"pt-Portuguese",
"qu-Quechua",
"rm-Rhaeto-Romance",
"rn-Kirundi",
"rm-Romansh",
"rn-Rundi",
"ro-Romanian",
"ru-Russian",
"rw-Kinyarwanda",
"sa-Sanskrit",
"sc-Sardinian",
"sd-Sindhi",
"sg-Sangro",
"se-Northern Sami",
"sg-Sango",
"sh-Serbo-Croatian",
"si-Singhalese",
"si-Sinhala; Sinhalese",
"sk-Slovak",
"sl-Slovenian",
"sm-Samoan",
"sn-Shona",
"so-Somali",
"sq-Albanian",
"sr-Serbian",
"ss-Siswati",
"st-Sesotho",
"ss-Swati",
"st-Southern Sotho",
"su-Sundanese",
"sv-Swedish",
"sw-Swahili",
"ta-Tamil",
"te-Tegulu",
"te-Telugu",
"tg-Tajik",
"th-Thai",
"ti-Tigrinya",
"tk-Turkmen",
"tl-Tagalog",
"tn-Setswana",
"to-Tonga",
"tn-Tswana",
"to-Tonga (Tonga Islands)",
"tr-Turkish",
"ts-Tsonga",
"tt-Tatar",
"tw-Twi",
"ty-Tahitian",
"ug-Uighur",
"uk-Ukrainian",
"ur-Urdu",
"uz-Uzbek",
"ve-Venda",
"vi-Vietnamese",
"vo-Volapuk",
"vo-Volapük",
"wa-Walloon",
"wo-Wolof",
"xh-Xhosa",
"yi-Yiddish",
"yo-Yoruba",
"za-Zhuang",
"zh-Chinese",
"zu-Zulu"};
/** Mapping from 2 letters ISO 639-1 code to ISO language reference name in English. */
private static Map<String, String> mapping = new ConcurrentHashMap<String, String>(codes.length);
static {

0 comments on commit 8e4f31b

Please sign in to comment.