]> git.wh0rd.org - tt-rss.git/blob - plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
Merge branch 'pullreq-fix-undefined-index-warning' of tkappe/tt-rss into master
[tt-rss.git] / plugins / af_lang_detect / languagedetect / Text / LanguageDetect / ISO639.php
1 <?php
2 /**
3 * Part of Text_LanguageDetect
4 *
5 * PHP version 5
6 *
7 * @category Text
8 * @package Text_LanguageDetect
9 * @author Christian Weiske <cweiske@php.net>
10 * @copyright 2011 Christian Weiske <cweiske@php.net>
11 * @license http://www.debian.org/misc/bsd.license BSD
12 * @link http://pear.php.net/package/Text_LanguageDetect/
13 */
14
15 /**
16 * Provides a mapping between the languages from lang.dat and the
17 * ISO 639-1 and ISO-639-2 codes.
18 *
19 * Note that this class contains only languages that exist in lang.dat.
20 *
21 * @category Text
22 * @package Text_LanguageDetect
23 * @author Christian Weiske <cweiske@php.net>
24 * @copyright 2011 Christian Weiske <cweiske@php.net>
25 * @license BSD http://www.opensource.org/licenses/bsd-license.php
26 * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
27 *
28 * @SuppressWarnings(PHPMD)
29 */
30 class Text_LanguageDetect_ISO639
31 {
32 /**
33 * Maps all language names from the language database to the
34 * ISO 639-1 2-letter language code.
35 *
36 * NULL indicates that there is no 2-letter code.
37 *
38 * @var array
39 */
40 public static $nameToCode2 = array(
41 'albanian' => 'sq',
42 'arabic' => 'ar',
43 'azeri' => 'az',
44 'bengali' => 'bn',
45 'bulgarian' => 'bg',
46 'cebuano' => null,
47 'croatian' => 'hr',
48 'czech' => 'cs',
49 'danish' => 'da',
50 'dutch' => 'nl',
51 'english' => 'en',
52 'estonian' => 'et',
53 'farsi' => 'fa',
54 'finnish' => 'fi',
55 'french' => 'fr',
56 'german' => 'de',
57 'hausa' => 'ha',
58 'hawaiian' => null,
59 'hindi' => 'hi',
60 'hungarian' => 'hu',
61 'icelandic' => 'is',
62 'indonesian' => 'id',
63 'italian' => 'it',
64 'kazakh' => 'kk',
65 'kyrgyz' => 'ky',
66 'latin' => 'la',
67 'latvian' => 'lv',
68 'lithuanian' => 'lt',
69 'macedonian' => 'mk',
70 'mongolian' => 'mn',
71 'nepali' => 'ne',
72 'norwegian' => 'no',
73 'pashto' => 'ps',
74 'pidgin' => null,
75 'polish' => 'pl',
76 'portuguese' => 'pt',
77 'romanian' => 'ro',
78 'russian' => 'ru',
79 'serbian' => 'sr',
80 'slovak' => 'sk',
81 'slovene' => 'sl',
82 'somali' => 'so',
83 'spanish' => 'es',
84 'swahili' => 'sw',
85 'swedish' => 'sv',
86 'tagalog' => 'tl',
87 'turkish' => 'tr',
88 'ukrainian' => 'uk',
89 'urdu' => 'ur',
90 'uzbek' => 'uz',
91 'vietnamese' => 'vi',
92 'welsh' => 'cy',
93 );
94
95 /**
96 * Maps all language names from the language database to the
97 * ISO 639-2 3-letter language code.
98 *
99 * @var array
100 */
101 public static $nameToCode3 = array(
102 'albanian' => 'sqi',
103 'arabic' => 'ara',
104 'azeri' => 'aze',
105 'bengali' => 'ben',
106 'bulgarian' => 'bul',
107 'cebuano' => 'ceb',
108 'croatian' => 'hrv',
109 'czech' => 'ces',
110 'danish' => 'dan',
111 'dutch' => 'nld',
112 'english' => 'eng',
113 'estonian' => 'est',
114 'farsi' => 'fas',
115 'finnish' => 'fin',
116 'french' => 'fra',
117 'german' => 'deu',
118 'hausa' => 'hau',
119 'hawaiian' => 'haw',
120 'hindi' => 'hin',
121 'hungarian' => 'hun',
122 'icelandic' => 'isl',
123 'indonesian' => 'ind',
124 'italian' => 'ita',
125 'kazakh' => 'kaz',
126 'kyrgyz' => 'kir',
127 'latin' => 'lat',
128 'latvian' => 'lav',
129 'lithuanian' => 'lit',
130 'macedonian' => 'mkd',
131 'mongolian' => 'mon',
132 'nepali' => 'nep',
133 'norwegian' => 'nor',
134 'pashto' => 'pus',
135 'pidgin' => 'crp',
136 'polish' => 'pol',
137 'portuguese' => 'por',
138 'romanian' => 'ron',
139 'russian' => 'rus',
140 'serbian' => 'srp',
141 'slovak' => 'slk',
142 'slovene' => 'slv',
143 'somali' => 'som',
144 'spanish' => 'spa',
145 'swahili' => 'swa',
146 'swedish' => 'swe',
147 'tagalog' => 'tgl',
148 'turkish' => 'tur',
149 'ukrainian' => 'ukr',
150 'urdu' => 'urd',
151 'uzbek' => 'uzb',
152 'vietnamese' => 'vie',
153 'welsh' => 'cym',
154 );
155
156 /**
157 * Maps ISO 639-1 2-letter language codes to the language names
158 * in the language database
159 *
160 * Not all languages have a 2 letter code, so some are missing
161 *
162 * @var array
163 */
164 public static $code2ToName = array(
165 'ar' => 'arabic',
166 'az' => 'azeri',
167 'bg' => 'bulgarian',
168 'bn' => 'bengali',
169 'cs' => 'czech',
170 'cy' => 'welsh',
171 'da' => 'danish',
172 'de' => 'german',
173 'en' => 'english',
174 'es' => 'spanish',
175 'et' => 'estonian',
176 'fa' => 'farsi',
177 'fi' => 'finnish',
178 'fr' => 'french',
179 'ha' => 'hausa',
180 'hi' => 'hindi',
181 'hr' => 'croatian',
182 'hu' => 'hungarian',
183 'id' => 'indonesian',
184 'is' => 'icelandic',
185 'it' => 'italian',
186 'kk' => 'kazakh',
187 'ky' => 'kyrgyz',
188 'la' => 'latin',
189 'lt' => 'lithuanian',
190 'lv' => 'latvian',
191 'mk' => 'macedonian',
192 'mn' => 'mongolian',
193 'ne' => 'nepali',
194 'nl' => 'dutch',
195 'no' => 'norwegian',
196 'pl' => 'polish',
197 'ps' => 'pashto',
198 'pt' => 'portuguese',
199 'ro' => 'romanian',
200 'ru' => 'russian',
201 'sk' => 'slovak',
202 'sl' => 'slovene',
203 'so' => 'somali',
204 'sq' => 'albanian',
205 'sr' => 'serbian',
206 'sv' => 'swedish',
207 'sw' => 'swahili',
208 'tl' => 'tagalog',
209 'tr' => 'turkish',
210 'uk' => 'ukrainian',
211 'ur' => 'urdu',
212 'uz' => 'uzbek',
213 'vi' => 'vietnamese',
214 );
215
216 /**
217 * Maps ISO 639-2 3-letter language codes to the language names
218 * in the language database.
219 *
220 * @var array
221 */
222 public static $code3ToName = array(
223 'ara' => 'arabic',
224 'aze' => 'azeri',
225 'ben' => 'bengali',
226 'bul' => 'bulgarian',
227 'ceb' => 'cebuano',
228 'ces' => 'czech',
229 'crp' => 'pidgin',
230 'cym' => 'welsh',
231 'dan' => 'danish',
232 'deu' => 'german',
233 'eng' => 'english',
234 'est' => 'estonian',
235 'fas' => 'farsi',
236 'fin' => 'finnish',
237 'fra' => 'french',
238 'hau' => 'hausa',
239 'haw' => 'hawaiian',
240 'hin' => 'hindi',
241 'hrv' => 'croatian',
242 'hun' => 'hungarian',
243 'ind' => 'indonesian',
244 'isl' => 'icelandic',
245 'ita' => 'italian',
246 'kaz' => 'kazakh',
247 'kir' => 'kyrgyz',
248 'lat' => 'latin',
249 'lav' => 'latvian',
250 'lit' => 'lithuanian',
251 'mkd' => 'macedonian',
252 'mon' => 'mongolian',
253 'nep' => 'nepali',
254 'nld' => 'dutch',
255 'nor' => 'norwegian',
256 'pol' => 'polish',
257 'por' => 'portuguese',
258 'pus' => 'pashto',
259 'rom' => 'romanian',
260 'rus' => 'russian',
261 'slk' => 'slovak',
262 'slv' => 'slovene',
263 'som' => 'somali',
264 'spa' => 'spanish',
265 'sqi' => 'albanian',
266 'srp' => 'serbian',
267 'swa' => 'swahili',
268 'swe' => 'swedish',
269 'tgl' => 'tagalog',
270 'tur' => 'turkish',
271 'ukr' => 'ukrainian',
272 'urd' => 'urdu',
273 'uzb' => 'uzbek',
274 'vie' => 'vietnamese',
275 );
276
277 /**
278 * Returns the 2-letter ISO 639-1 code for the given language name.
279 *
280 * @param string $lang English language name like "swedish"
281 *
282 * @return string Two-letter language code (e.g. "sv") or NULL if not found
283 */
284 public static function nameToCode2($lang)
285 {
286 $lang = strtolower($lang);
287 if (!isset(self::$nameToCode2[$lang])) {
288 return null;
289 }
290 return self::$nameToCode2[$lang];
291 }
292
293 /**
294 * Returns the 3-letter ISO 639-2 code for the given language name.
295 *
296 * @param string $lang English language name like "swedish"
297 *
298 * @return string Three-letter language code (e.g. "swe") or NULL if not found
299 */
300 public static function nameToCode3($lang)
301 {
302 $lang = strtolower($lang);
303 if (!isset(self::$nameToCode3[$lang])) {
304 return null;
305 }
306 return self::$nameToCode3[$lang];
307 }
308
309 /**
310 * Returns the language name for the given 2-letter ISO 639-1 code.
311 *
312 * @param string $code Two-letter language code (e.g. "sv")
313 *
314 * @return string English language name like "swedish"
315 */
316 public static function code2ToName($code)
317 {
318 $lang = strtolower($code);
319 if (!isset(self::$code2ToName[$code])) {
320 return null;
321 }
322 return self::$code2ToName[$code];
323 }
324
325 /**
326 * Returns the language name for the given 3-letter ISO 639-2 code.
327 *
328 * @param string $code Three-letter language code (e.g. "swe")
329 *
330 * @return string English language name like "swedish"
331 */
332 public static function code3ToName($code)
333 {
334 $lang = strtolower($code);
335 if (!isset(self::$code3ToName[$code])) {
336 return null;
337 }
338 return self::$code3ToName[$code];
339 }
340 }