[tt-rss.git] / accept-to-gettext.php

<?php
/*
 * accept-to-gettext.inc -- convert information in 'Accept-*' headers to
 * gettext language identifiers.
 * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Usage:
 *
 *  $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
 *                <MIME type of document>);
 *  setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
 *
 * Example:
 *
 *  $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
 *  $locale=al2gt($langs, 'text/html');
 *  setlocale('LC_ALL', $locale);
 *
 * Note that this will send out header information (to be
 * RFC2616-compliant), so it must be called before anything is sent to
 * the user.
 * 
 * Assumptions made:
 * * Charset encodings are written the same way as the Accept-Charset
 *   HTTP header specifies them (RFC2616), except that they're parsed
 *   case-insensitive.
 * * Country codes and language codes are the same in both gettext and
 *   the Accept-Language syntax (except for the case differences, which
 *   are dealt with easily). If not, some input may be ignored.
 * * The provided gettext-strings are fully qualified; i.e., no "en_US";
 *   always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
 *   used. "en.ISO-8859-15" is OK, though.
 * * The language is more important than the charset; i.e., if the
 *   following is given:
 * 
 *   Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
 *   Accept-Charset: ISO-8859-15, utf-8;q=0.5
 *
 *   And the supplied parameter contains (amongst others) nl_BE.UTF-8
 *   and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
 * 
 * $Log: accept-to-gettext.inc,v $
 * Revision 1.1.1.1  2003/11/19 19:31:15  wouter
 * * moved to new CVS repo after death of the old
 * * Fixed code to apply a default to both Accept-Charset and
 *   Accept-Language if none of those headers are supplied; patch from
 *   Dominic Chambers <dominic@encasa.com>
 *
 * Revision 1.2  2003/08/14 10:23:59  wouter
 * Removed little error in Content-Type header syntaxis.
 *
 */

/* not really important, this one; perhaps I could've put it inline with
 * the rest. */
function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
                    $gtlang)
{
  if($curlscore < $langval) {
    $curlscore=$langval;
    $curcscore=$charval;
    $curgtlang=$gtlang;
  } else if ($curlscore == $langval) {
    if($curcscore < $charval) {
      $curcscore=$charval;
      $curgtlang=$gtlang;
    }
  }
  return array($curlscore, $curcscore, $curgtlang);
}

function al2gt($gettextlangs, $mime) {
  /* default to "everything is acceptable", as RFC2616 specifies */
  $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' :
  	$_SERVER["HTTP_ACCEPT_LANGUAGE"]);
  $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' :
  	$_SERVER["HTTP_ACCEPT_CHARSET"]);
  $alparts=@preg_split("/,/",$acceptLang);
  $acparts=@preg_split("/,/",$acceptChar);
  
  /* Parse the contents of the Accept-Language header.*/
  foreach($alparts as $part) {
    $part=trim($part);
    if(preg_match("/;/", $part)) {
      $lang=@preg_split("/;/",$part);
      $score=@preg_split("/=/",$lang[1]);
      $alscores[$lang[0]]=$score[1];
    } else {
      $alscores[$part]=1;
    }
  }

  /* Do the same for the Accept-Charset header. */

  /* RFC2616: ``If no "*" is present in an Accept-Charset field, then
   * all character sets not explicitly mentioned get a quality value of
   * 0, except for ISO-8859-1, which gets a quality value of 1 if not
   * explicitly mentioned.''
   * 
   * Making it 2 for the time being, so that we
   * can distinguish between "not specified" and "specified as 1" later
   * on. */
  $acscores["ISO-8859-1"]=2;

  foreach($acparts as $part) {
    $part=trim($part);
    if(preg_match("/;/", $part)) {
      $cs=@preg_split("/;/",$part);
      $score=@preg_split("/=/",$cs[1]);
      $acscores[strtoupper($cs[0])]=$score[1];
    } else {
      $acscores[strtoupper($part)]=1;
    }
  }
  if($acscores["ISO-8859-1"]==2) {
    $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1);
  }

  /* 
   * Loop through the available languages/encodings, and pick the one
   * with the highest score, excluding the ones with a charset the user
   * did not include.
   */
  $curlscore=0;
  $curcscore=0;
  $curgtlang=NULL;
  foreach($gettextlangs as $gtlang) {

    $tmp1=preg_replace("/\_/","-",$gtlang);
    $tmp2=@preg_split("/\./",$tmp1);
    $allang=strtolower($tmp2[0]);
    $gtcs=strtoupper($tmp2[1]);
    $noct=@preg_split("/-/",$allang);

    $testvals=array(
         array($alscores[$allang], $acscores[$gtcs]),
	 array($alscores[$noct[0]], $acscores[$gtcs]),
	 array($alscores[$allang], $acscores["*"]),
	 array($alscores[$noct[0]], $acscores["*"]),
	 array($alscores["*"], $acscores[$gtcs]),
	 array($alscores["*"], $acscores["*"]));

    $found=FALSE;
    foreach($testvals as $tval) {
      if(!$found && isset($tval[0]) && isset($tval[1])) {
        $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0],
	          $tval[1], $gtlang);
        $curlscore=$arr[0];
        $curcscore=$arr[1];
        $curgtlang=$arr[2];
	$found=TRUE;
      }
    }
  }

  /* We must re-parse the gettext-string now, since we may have found it
   * through a "*" qualifier.*/
  
  $gtparts=@preg_split("/\./",$curgtlang);
  $tmp=strtolower($gtparts[0]);
  $lang=preg_replace("/\_/", "-", $tmp);
  $charset=$gtparts[1];

  header("Content-Language: $lang");
  header("Content-Type: $mime; charset=$charset");

  return $curgtlang;
}

?>
Commit	Line	Data
659468eb AD	1	<?php
	2	/*
	3	* accept-to-gettext.inc -- convert information in 'Accept-*' headers to
	4	* gettext language identifiers.
	5	* Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
	6	*
	7	* This program is free software; you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation; either version 2 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* This program is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with this program; if not, write to the Free Software
	19	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	20	*
	21	* Usage:
	22	*
	23	* $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
	24	* <MIME type of document>);
	25	* setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
	26	*
	27	* Example:
	28	*
	29	* $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
	30	* $locale=al2gt($langs, 'text/html');
	31	* setlocale('LC_ALL', $locale);
	32	*
	33	* Note that this will send out header information (to be
	34	* RFC2616-compliant), so it must be called before anything is sent to
	35	* the user.
	36	*
	37	* Assumptions made:
	38	* * Charset encodings are written the same way as the Accept-Charset
	39	* HTTP header specifies them (RFC2616), except that they're parsed
	40	* case-insensitive.
	41	* * Country codes and language codes are the same in both gettext and
	42	* the Accept-Language syntax (except for the case differences, which
	43	* are dealt with easily). If not, some input may be ignored.
	44	* * The provided gettext-strings are fully qualified; i.e., no "en_US";
	45	* always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
	46	* used. "en.ISO-8859-15" is OK, though.
	47	* * The language is more important than the charset; i.e., if the
	48	* following is given:
	49	*
	50	* Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
	51	* Accept-Charset: ISO-8859-15, utf-8;q=0.5
	52	*
	53	* And the supplied parameter contains (amongst others) nl_BE.UTF-8
	54	* and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
	55	*
	56	* $Log: accept-to-gettext.inc,v $
	57	* Revision 1.1.1.1 2003/11/19 19:31:15 wouter
	58	* * moved to new CVS repo after death of the old
	59	* * Fixed code to apply a default to both Accept-Charset and
	60	* Accept-Language if none of those headers are supplied; patch from
	61	* Dominic Chambers <dominic@encasa.com>
	62	*
	63	* Revision 1.2 2003/08/14 10:23:59 wouter
	64	* Removed little error in Content-Type header syntaxis.
65	*
66	*/
67
68	/* not really important, this one; perhaps I could've put it inline with
69	* the rest. */
70	function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
71	$gtlang)
72	{
73	if($curlscore < $langval) {
74	$curlscore=$langval;
75	$curcscore=$charval;
76	$curgtlang=$gtlang;
77	} else if ($curlscore == $langval) {
78	if($curcscore < $charval) {
79	$curcscore=$charval;
80	$curgtlang=$gtlang;
81	}
82	}
83	return array($curlscore, $curcscore, $curgtlang);
84	}
85
86	function al2gt($gettextlangs, $mime) {
87	/* default to "everything is acceptable", as RFC2616 specifies */
88	$acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' :
89	$_SERVER["HTTP_ACCEPT_LANGUAGE"]);
90	$acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' :
91	$_SERVER["HTTP_ACCEPT_CHARSET"]);
92	$alparts=@preg_split("/,/",$acceptLang);
93	$acparts=@preg_split("/,/",$acceptChar);
94
95	/* Parse the contents of the Accept-Language header.*/
96	foreach($alparts as $part) {
97	$part=trim($part);
98	if(preg_match("/;/", $part)) {
99	$lang=@preg_split("/;/",$part);
100	$score=@preg_split("/=/",$lang[1]);
101	$alscores[$lang[0]]=$score[1];
102	} else {
103	$alscores[$part]=1;
104	}
105	}
106
107	/* Do the same for the Accept-Charset header. */
108
109	/* RFC2616: ``If no "*" is present in an Accept-Charset field, then
110	* all character sets not explicitly mentioned get a quality value of
111	* 0, except for ISO-8859-1, which gets a quality value of 1 if not
112	* explicitly mentioned.''
113	*
114	* Making it 2 for the time being, so that we
115	* can distinguish between "not specified" and "specified as 1" later
116	* on. */
117	$acscores["ISO-8859-1"]=2;
118
119	foreach($acparts as $part) {
120	$part=trim($part);
121	if(preg_match("/;/", $part)) {
122	$cs=@preg_split("/;/",$part);
123	$score=@preg_split("/=/",$cs[1]);
124	$acscores[strtoupper($cs[0])]=$score[1];
125	} else {
126	$acscores[strtoupper($part)]=1;
127	}
128	}
129	if($acscores["ISO-8859-1"]==2) {
130	$acscores["ISO-8859-1"]=(isset($acscores[""])?$acscores[""]:1);
131	}
132
133	/*
134	* Loop through the available languages/encodings, and pick the one
135	* with the highest score, excluding the ones with a charset the user
136	* did not include.
137	*/
138	$curlscore=0;
139	$curcscore=0;
140	$curgtlang=NULL;
141	foreach($gettextlangs as $gtlang) {
142
143	$tmp1=preg_replace("/\_/","-",$gtlang);
144	$tmp2=@preg_split("/\./",$tmp1);
145	$allang=strtolower($tmp2[0]);
146	$gtcs=strtoupper($tmp2[1]);
147	$noct=@preg_split("/-/",$allang);
148
149	$testvals=array(
150	array($alscores[$allang], $acscores[$gtcs]),
151	array($alscores[$noct[0]], $acscores[$gtcs]),
152	array($alscores[$allang], $acscores["*"]),
153	array($alscores[$noct[0]], $acscores["*"]),
154	array($alscores["*"], $acscores[$gtcs]),
155	array($alscores[""], $acscores[""]));
156
157	$found=FALSE;
158	foreach($testvals as $tval) {
159	if(!$found && isset($tval[0]) && isset($tval[1])) {
160	$arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0],
161	$tval[1], $gtlang);
162	$curlscore=$arr[0];
163	$curcscore=$arr[1];
164	$curgtlang=$arr[2];
165	$found=TRUE;
166	}
167	}
168	}
169
170	/* We must re-parse the gettext-string now, since we may have found it
171	* through a "" qualifier./
172
173	$gtparts=@preg_split("/\./",$curgtlang);
174	$tmp=strtolower($gtparts[0]);
175	$lang=preg_replace("/\_/", "-", $tmp);
176	$charset=$gtparts[1];
177
178	header("Content-Language: $lang");
179	header("Content-Type: $mime; charset=$charset");
180
181	return $curgtlang;
182	}
183
184	?>