]> git.wh0rd.org - tt-rss.git/blame - lib/htmlpurifier/library/HTMLPurifier/PercentEncoder.php
remove Archived articles from Uncategorized view
[tt-rss.git] / lib / htmlpurifier / library / HTMLPurifier / PercentEncoder.php
CommitLineData
f45a286b
AD
1<?php
2
3/**
4 * Class that handles operations involving percent-encoding in URIs.
5 *
6 * @warning
7 * Be careful when reusing instances of PercentEncoder. The object
8 * you use for normalize() SHOULD NOT be used for encode(), or
9 * vice-versa.
10 */
11class HTMLPurifier_PercentEncoder
12{
13
14 /**
15 * Reserved characters to preserve when using encode().
16 */
17 protected $preserve = array();
18
19 /**
20 * String of characters that should be preserved while using encode().
21 */
22 public function __construct($preserve = false) {
23 // unreserved letters, ought to const-ify
24 for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
25 for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
26 for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
27 $this->preserve[45] = true; // Dash -
28 $this->preserve[46] = true; // Period .
29 $this->preserve[95] = true; // Underscore _
30 $this->preserve[126]= true; // Tilde ~
31
32 // extra letters not to escape
33 if ($preserve !== false) {
34 for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
35 $this->preserve[ord($preserve[$i])] = true;
36 }
37 }
38 }
39
40 /**
41 * Our replacement for urlencode, it encodes all non-reserved characters,
42 * as well as any extra characters that were instructed to be preserved.
43 * @note
44 * Assumes that the string has already been normalized, making any
45 * and all percent escape sequences valid. Percents will not be
46 * re-escaped, regardless of their status in $preserve
47 * @param $string String to be encoded
48 * @return Encoded string.
49 */
50 public function encode($string) {
51 $ret = '';
52 for ($i = 0, $c = strlen($string); $i < $c; $i++) {
53 if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
54 $ret .= '%' . sprintf('%02X', $int);
55 } else {
56 $ret .= $string[$i];
57 }
58 }
59 return $ret;
60 }
61
62 /**
63 * Fix up percent-encoding by decoding unreserved characters and normalizing.
64 * @warning This function is affected by $preserve, even though the
65 * usual desired behavior is for this not to preserve those
66 * characters. Be careful when reusing instances of PercentEncoder!
67 * @param $string String to normalize
68 */
69 public function normalize($string) {
70 if ($string == '') return '';
71 $parts = explode('%', $string);
72 $ret = array_shift($parts);
73 foreach ($parts as $part) {
74 $length = strlen($part);
75 if ($length < 2) {
76 $ret .= '%25' . $part;
77 continue;
78 }
79 $encoding = substr($part, 0, 2);
80 $text = substr($part, 2);
81 if (!ctype_xdigit($encoding)) {
82 $ret .= '%25' . $part;
83 continue;
84 }
85 $int = hexdec($encoding);
86 if (isset($this->preserve[$int])) {
87 $ret .= chr($int) . $text;
88 continue;
89 }
90 $encoding = strtoupper($encoding);
91 $ret .= '%' . $encoding . $text;
92 }
93 return $ret;
94 }
95
96}
97
98// vim: et sw=4 sts=4