]>
Commit | Line | Data |
---|---|---|
f45a286b AD |
1 | <?php |
2 | ||
3 | /** | |
4 | * Class that handles operations involving percent-encoding in URIs. | |
5 | * | |
6 | * @warning | |
7 | * Be careful when reusing instances of PercentEncoder. The object | |
8 | * you use for normalize() SHOULD NOT be used for encode(), or | |
9 | * vice-versa. | |
10 | */ | |
11 | class HTMLPurifier_PercentEncoder | |
12 | { | |
13 | ||
14 | /** | |
15 | * Reserved characters to preserve when using encode(). | |
16 | */ | |
17 | protected $preserve = array(); | |
18 | ||
19 | /** | |
20 | * String of characters that should be preserved while using encode(). | |
21 | */ | |
22 | public function __construct($preserve = false) { | |
23 | // unreserved letters, ought to const-ify | |
24 | for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits | |
25 | for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case | |
26 | for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case | |
27 | $this->preserve[45] = true; // Dash - | |
28 | $this->preserve[46] = true; // Period . | |
29 | $this->preserve[95] = true; // Underscore _ | |
30 | $this->preserve[126]= true; // Tilde ~ | |
31 | ||
32 | // extra letters not to escape | |
33 | if ($preserve !== false) { | |
34 | for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { | |
35 | $this->preserve[ord($preserve[$i])] = true; | |
36 | } | |
37 | } | |
38 | } | |
39 | ||
40 | /** | |
41 | * Our replacement for urlencode, it encodes all non-reserved characters, | |
42 | * as well as any extra characters that were instructed to be preserved. | |
43 | * @note | |
44 | * Assumes that the string has already been normalized, making any | |
45 | * and all percent escape sequences valid. Percents will not be | |
46 | * re-escaped, regardless of their status in $preserve | |
47 | * @param $string String to be encoded | |
48 | * @return Encoded string. | |
49 | */ | |
50 | public function encode($string) { | |
51 | $ret = ''; | |
52 | for ($i = 0, $c = strlen($string); $i < $c; $i++) { | |
53 | if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) { | |
54 | $ret .= '%' . sprintf('%02X', $int); | |
55 | } else { | |
56 | $ret .= $string[$i]; | |
57 | } | |
58 | } | |
59 | return $ret; | |
60 | } | |
61 | ||
62 | /** | |
63 | * Fix up percent-encoding by decoding unreserved characters and normalizing. | |
64 | * @warning This function is affected by $preserve, even though the | |
65 | * usual desired behavior is for this not to preserve those | |
66 | * characters. Be careful when reusing instances of PercentEncoder! | |
67 | * @param $string String to normalize | |
68 | */ | |
69 | public function normalize($string) { | |
70 | if ($string == '') return ''; | |
71 | $parts = explode('%', $string); | |
72 | $ret = array_shift($parts); | |
73 | foreach ($parts as $part) { | |
74 | $length = strlen($part); | |
75 | if ($length < 2) { | |
76 | $ret .= '%25' . $part; | |
77 | continue; | |
78 | } | |
79 | $encoding = substr($part, 0, 2); | |
80 | $text = substr($part, 2); | |
81 | if (!ctype_xdigit($encoding)) { | |
82 | $ret .= '%25' . $part; | |
83 | continue; | |
84 | } | |
85 | $int = hexdec($encoding); | |
86 | if (isset($this->preserve[$int])) { | |
87 | $ret .= chr($int) . $text; | |
88 | continue; | |
89 | } | |
90 | $encoding = strtoupper($encoding); | |
91 | $ret .= '%' . $encoding . $text; | |
92 | } | |
93 | return $ret; | |
94 | } | |
95 | ||
96 | } | |
97 | ||
98 | // vim: et sw=4 sts=4 |