]> git.wh0rd.org - tt-rss.git/blob - plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
7caa9794e33e476b47523e63ff05df64d9a3c308
[tt-rss.git] / plugins / af_lang_detect / languagedetect / Text / LanguageDetect / ISO639.php
1 <?php
2 /**
3 * Part of Text_LanguageDetect
4 *
5 * PHP version 5
6 *
7 * @category Text
8 * @package Text_LanguageDetect
9 * @author Christian Weiske <cweiske@php.net>
10 * @copyright 2011 Christian Weiske <cweiske@php.net>
11 * @license http://www.debian.org/misc/bsd.license BSD
12 * @version SVN: $Id$
13 * @link http://pear.php.net/package/Text_LanguageDetect/
14 */
15
16 /**
17 * Provides a mapping between the languages from lang.dat and the
18 * ISO 639-1 and ISO-639-2 codes.
19 *
20 * Note that this class contains only languages that exist in lang.dat.
21 *
22 * @category Text
23 * @package Text_LanguageDetect
24 * @author Christian Weiske <cweiske@php.net>
25 * @copyright 2011 Christian Weiske <cweiske@php.net>
26 * @license http://www.debian.org/misc/bsd.license BSD
27 * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
28 *
29 * @SuppressWarnings(PHPMD)
30 */
31 class Text_LanguageDetect_ISO639
32 {
33 /**
34 * Maps all language names from the language database to the
35 * ISO 639-1 2-letter language code.
36 *
37 * NULL indicates that there is no 2-letter code.
38 *
39 * @var array
40 */
41 public static $nameToCode2 = array(
42 'albanian' => 'sq',
43 'arabic' => 'ar',
44 'azeri' => 'az',
45 'bengali' => 'bn',
46 'bulgarian' => 'bg',
47 'cebuano' => null,
48 'croatian' => 'hr',
49 'czech' => 'cs',
50 'danish' => 'da',
51 'dutch' => 'nl',
52 'english' => 'en',
53 'estonian' => 'et',
54 'farsi' => 'fa',
55 'finnish' => 'fi',
56 'french' => 'fr',
57 'german' => 'de',
58 'hausa' => 'ha',
59 'hawaiian' => null,
60 'hindi' => 'hi',
61 'hungarian' => 'hu',
62 'icelandic' => 'is',
63 'indonesian' => 'id',
64 'italian' => 'it',
65 'kazakh' => 'kk',
66 'kyrgyz' => 'ky',
67 'latin' => 'la',
68 'latvian' => 'lv',
69 'lithuanian' => 'lt',
70 'macedonian' => 'mk',
71 'mongolian' => 'mn',
72 'nepali' => 'ne',
73 'norwegian' => 'no',
74 'pashto' => 'ps',
75 'pidgin' => null,
76 'polish' => 'pl',
77 'portuguese' => 'pt',
78 'romanian' => 'ro',
79 'russian' => 'ru',
80 'serbian' => 'sr',
81 'slovak' => 'sk',
82 'slovene' => 'sl',
83 'somali' => 'so',
84 'spanish' => 'es',
85 'swahili' => 'sw',
86 'swedish' => 'sv',
87 'tagalog' => 'tl',
88 'turkish' => 'tr',
89 'ukrainian' => 'uk',
90 'urdu' => 'ur',
91 'uzbek' => 'uz',
92 'vietnamese' => 'vi',
93 'welsh' => 'cy',
94 );
95
96 /**
97 * Maps all language names from the language database to the
98 * ISO 639-2 3-letter language code.
99 *
100 * @var array
101 */
102 public static $nameToCode3 = array(
103 'albanian' => 'sqi',
104 'arabic' => 'ara',
105 'azeri' => 'aze',
106 'bengali' => 'ben',
107 'bulgarian' => 'bul',
108 'cebuano' => 'ceb',
109 'croatian' => 'hrv',
110 'czech' => 'ces',
111 'danish' => 'dan',
112 'dutch' => 'nld',
113 'english' => 'eng',
114 'estonian' => 'est',
115 'farsi' => 'fas',
116 'finnish' => 'fin',
117 'french' => 'fra',
118 'german' => 'deu',
119 'hausa' => 'hau',
120 'hawaiian' => 'haw',
121 'hindi' => 'hin',
122 'hungarian' => 'hun',
123 'icelandic' => 'isl',
124 'indonesian' => 'ind',
125 'italian' => 'ita',
126 'kazakh' => 'kaz',
127 'kyrgyz' => 'kir',
128 'latin' => 'lat',
129 'latvian' => 'lav',
130 'lithuanian' => 'lit',
131 'macedonian' => 'mkd',
132 'mongolian' => 'mon',
133 'nepali' => 'nep',
134 'norwegian' => 'nor',
135 'pashto' => 'pus',
136 'pidgin' => 'crp',
137 'polish' => 'pol',
138 'portuguese' => 'por',
139 'romanian' => 'ron',
140 'russian' => 'rus',
141 'serbian' => 'srp',
142 'slovak' => 'slk',
143 'slovene' => 'slv',
144 'somali' => 'som',
145 'spanish' => 'spa',
146 'swahili' => 'swa',
147 'swedish' => 'swe',
148 'tagalog' => 'tgl',
149 'turkish' => 'tur',
150 'ukrainian' => 'ukr',
151 'urdu' => 'urd',
152 'uzbek' => 'uzb',
153 'vietnamese' => 'vie',
154 'welsh' => 'cym',
155 );
156
157 /**
158 * Maps ISO 639-1 2-letter language codes to the language names
159 * in the language database
160 *
161 * Not all languages have a 2 letter code, so some are missing
162 *
163 * @var array
164 */
165 public static $code2ToName = array(
166 'ar' => 'arabic',
167 'az' => 'azeri',
168 'bg' => 'bulgarian',
169 'bn' => 'bengali',
170 'cs' => 'czech',
171 'cy' => 'welsh',
172 'da' => 'danish',
173 'de' => 'german',
174 'en' => 'english',
175 'es' => 'spanish',
176 'et' => 'estonian',
177 'fa' => 'farsi',
178 'fi' => 'finnish',
179 'fr' => 'french',
180 'ha' => 'hausa',
181 'hi' => 'hindi',
182 'hr' => 'croatian',
183 'hu' => 'hungarian',
184 'id' => 'indonesian',
185 'is' => 'icelandic',
186 'it' => 'italian',
187 'kk' => 'kazakh',
188 'ky' => 'kyrgyz',
189 'la' => 'latin',
190 'lt' => 'lithuanian',
191 'lv' => 'latvian',
192 'mk' => 'macedonian',
193 'mn' => 'mongolian',
194 'ne' => 'nepali',
195 'nl' => 'dutch',
196 'no' => 'norwegian',
197 'pl' => 'polish',
198 'ps' => 'pashto',
199 'pt' => 'portuguese',
200 'ro' => 'romanian',
201 'ru' => 'russian',
202 'sk' => 'slovak',
203 'sl' => 'slovene',
204 'so' => 'somali',
205 'sq' => 'albanian',
206 'sr' => 'serbian',
207 'sv' => 'swedish',
208 'sw' => 'swahili',
209 'tl' => 'tagalog',
210 'tr' => 'turkish',
211 'uk' => 'ukrainian',
212 'ur' => 'urdu',
213 'uz' => 'uzbek',
214 'vi' => 'vietnamese',
215 );
216
217 /**
218 * Maps ISO 639-2 3-letter language codes to the language names
219 * in the language database.
220 *
221 * @var array
222 */
223 public static $code3ToName = array(
224 'ara' => 'arabic',
225 'aze' => 'azeri',
226 'ben' => 'bengali',
227 'bul' => 'bulgarian',
228 'ceb' => 'cebuano',
229 'ces' => 'czech',
230 'crp' => 'pidgin',
231 'cym' => 'welsh',
232 'dan' => 'danish',
233 'deu' => 'german',
234 'eng' => 'english',
235 'est' => 'estonian',
236 'fas' => 'farsi',
237 'fin' => 'finnish',
238 'fra' => 'french',
239 'hau' => 'hausa',
240 'haw' => 'hawaiian',
241 'hin' => 'hindi',
242 'hrv' => 'croatian',
243 'hun' => 'hungarian',
244 'ind' => 'indonesian',
245 'isl' => 'icelandic',
246 'ita' => 'italian',
247 'kaz' => 'kazakh',
248 'kir' => 'kyrgyz',
249 'lat' => 'latin',
250 'lav' => 'latvian',
251 'lit' => 'lithuanian',
252 'mkd' => 'macedonian',
253 'mon' => 'mongolian',
254 'nep' => 'nepali',
255 'nld' => 'dutch',
256 'nor' => 'norwegian',
257 'pol' => 'polish',
258 'por' => 'portuguese',
259 'pus' => 'pashto',
260 'rom' => 'romanian',
261 'rus' => 'russian',
262 'slk' => 'slovak',
263 'slv' => 'slovene',
264 'som' => 'somali',
265 'spa' => 'spanish',
266 'sqi' => 'albanian',
267 'srp' => 'serbian',
268 'swa' => 'swahili',
269 'swe' => 'swedish',
270 'tgl' => 'tagalog',
271 'tur' => 'turkish',
272 'ukr' => 'ukrainian',
273 'urd' => 'urdu',
274 'uzb' => 'uzbek',
275 'vie' => 'vietnamese',
276 );
277
278 /**
279 * Returns the 2-letter ISO 639-1 code for the given language name.
280 *
281 * @param string $lang English language name like "swedish"
282 *
283 * @return string Two-letter language code (e.g. "sv") or NULL if not found
284 */
285 public static function nameToCode2($lang)
286 {
287 $lang = strtolower($lang);
288 if (!isset(self::$nameToCode2[$lang])) {
289 return null;
290 }
291 return self::$nameToCode2[$lang];
292 }
293
294 /**
295 * Returns the 3-letter ISO 639-2 code for the given language name.
296 *
297 * @param string $lang English language name like "swedish"
298 *
299 * @return string Three-letter language code (e.g. "swe") or NULL if not found
300 */
301 public static function nameToCode3($lang)
302 {
303 $lang = strtolower($lang);
304 if (!isset(self::$nameToCode3[$lang])) {
305 return null;
306 }
307 return self::$nameToCode3[$lang];
308 }
309
310 /**
311 * Returns the language name for the given 2-letter ISO 639-1 code.
312 *
313 * @param string $code Two-letter language code (e.g. "sv")
314 *
315 * @return string English language name like "swedish"
316 */
317 public static function code2ToName($code)
318 {
319 $lang = strtolower($code);
320 if (!isset(self::$code2ToName[$code])) {
321 return null;
322 }
323 return self::$code2ToName[$code];
324 }
325
326 /**
327 * Returns the language name for the given 3-letter ISO 639-2 code.
328 *
329 * @param string $code Three-letter language code (e.g. "swe")
330 *
331 * @return string English language name like "swedish"
332 */
333 public static function code3ToName($code)
334 {
335 $lang = strtolower($code);
336 if (!isset(self::$code3ToName[$code])) {
337 return null;
338 }
339 return self::$code3ToName[$code];
340 }
341 }