]>
Commit | Line | Data |
---|---|---|
f45a286b AD |
1 | <?php |
2 | ||
3 | /*! @mainpage | |
4 | * | |
5 | * HTML Purifier is an HTML filter that will take an arbitrary snippet of | |
6 | * HTML and rigorously test, validate and filter it into a version that | |
7 | * is safe for output onto webpages. It achieves this by: | |
8 | * | |
9 | * -# Lexing (parsing into tokens) the document, | |
10 | * -# Executing various strategies on the tokens: | |
11 | * -# Removing all elements not in the whitelist, | |
12 | * -# Making the tokens well-formed, | |
13 | * -# Fixing the nesting of the nodes, and | |
14 | * -# Validating attributes of the nodes; and | |
15 | * -# Generating HTML from the purified tokens. | |
16 | * | |
17 | * However, most users will only need to interface with the HTMLPurifier | |
18 | * and HTMLPurifier_Config. | |
19 | */ | |
20 | ||
21 | /* | |
f4f0f80d | 22 | HTML Purifier 4.3.0 - Standards Compliant HTML Filtering |
f45a286b AD |
23 | Copyright (C) 2006-2008 Edward Z. Yang |
24 | ||
25 | This library is free software; you can redistribute it and/or | |
26 | modify it under the terms of the GNU Lesser General Public | |
27 | License as published by the Free Software Foundation; either | |
28 | version 2.1 of the License, or (at your option) any later version. | |
29 | ||
30 | This library is distributed in the hope that it will be useful, | |
31 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
32 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
33 | Lesser General Public License for more details. | |
34 | ||
35 | You should have received a copy of the GNU Lesser General Public | |
36 | License along with this library; if not, write to the Free Software | |
37 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
38 | */ | |
39 | ||
40 | /** | |
41 | * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. | |
42 | * | |
43 | * @note There are several points in which configuration can be specified | |
44 | * for HTML Purifier. The precedence of these (from lowest to | |
45 | * highest) is as follows: | |
46 | * -# Instance: new HTMLPurifier($config) | |
47 | * -# Invocation: purify($html, $config) | |
48 | * These configurations are entirely independent of each other and | |
49 | * are *not* merged (this behavior may change in the future). | |
50 | * | |
51 | * @todo We need an easier way to inject strategies using the configuration | |
52 | * object. | |
53 | */ | |
54 | class HTMLPurifier | |
55 | { | |
56 | ||
57 | /** Version of HTML Purifier */ | |
f4f0f80d | 58 | public $version = '4.3.0'; |
f45a286b AD |
59 | |
60 | /** Constant with version of HTML Purifier */ | |
f4f0f80d | 61 | const VERSION = '4.3.0'; |
f45a286b AD |
62 | |
63 | /** Global configuration object */ | |
64 | public $config; | |
65 | ||
66 | /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */ | |
67 | private $filters = array(); | |
68 | ||
69 | /** Single instance of HTML Purifier */ | |
70 | private static $instance; | |
71 | ||
72 | protected $strategy, $generator; | |
73 | ||
74 | /** | |
75 | * Resultant HTMLPurifier_Context of last run purification. Is an array | |
76 | * of contexts if the last called method was purifyArray(). | |
77 | */ | |
78 | public $context; | |
79 | ||
80 | /** | |
81 | * Initializes the purifier. | |
82 | * @param $config Optional HTMLPurifier_Config object for all instances of | |
83 | * the purifier, if omitted, a default configuration is | |
84 | * supplied (which can be overridden on a per-use basis). | |
85 | * The parameter can also be any type that | |
86 | * HTMLPurifier_Config::create() supports. | |
87 | */ | |
88 | public function __construct($config = null) { | |
89 | ||
90 | $this->config = HTMLPurifier_Config::create($config); | |
91 | ||
92 | $this->strategy = new HTMLPurifier_Strategy_Core(); | |
93 | ||
94 | } | |
95 | ||
96 | /** | |
97 | * Adds a filter to process the output. First come first serve | |
98 | * @param $filter HTMLPurifier_Filter object | |
99 | */ | |
100 | public function addFilter($filter) { | |
101 | trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); | |
102 | $this->filters[] = $filter; | |
103 | } | |
104 | ||
105 | /** | |
106 | * Filters an HTML snippet/document to be XSS-free and standards-compliant. | |
107 | * | |
108 | * @param $html String of HTML to purify | |
109 | * @param $config HTMLPurifier_Config object for this operation, if omitted, | |
110 | * defaults to the config object specified during this | |
111 | * object's construction. The parameter can also be any type | |
112 | * that HTMLPurifier_Config::create() supports. | |
113 | * @return Purified HTML | |
114 | */ | |
115 | public function purify($html, $config = null) { | |
116 | ||
117 | // :TODO: make the config merge in, instead of replace | |
118 | $config = $config ? HTMLPurifier_Config::create($config) : $this->config; | |
119 | ||
120 | // implementation is partially environment dependant, partially | |
121 | // configuration dependant | |
122 | $lexer = HTMLPurifier_Lexer::create($config); | |
123 | ||
124 | $context = new HTMLPurifier_Context(); | |
125 | ||
126 | // setup HTML generator | |
127 | $this->generator = new HTMLPurifier_Generator($config, $context); | |
128 | $context->register('Generator', $this->generator); | |
129 | ||
130 | // set up global context variables | |
f4f0f80d | 131 | if ($config->get('Core.CollectErrors')) { |
f45a286b AD |
132 | // may get moved out if other facilities use it |
133 | $language_factory = HTMLPurifier_LanguageFactory::instance(); | |
134 | $language = $language_factory->create($config, $context); | |
135 | $context->register('Locale', $language); | |
136 | ||
137 | $error_collector = new HTMLPurifier_ErrorCollector($context); | |
138 | $context->register('ErrorCollector', $error_collector); | |
139 | } | |
140 | ||
141 | // setup id_accumulator context, necessary due to the fact that | |
142 | // AttrValidator can be called from many places | |
143 | $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); | |
144 | $context->register('IDAccumulator', $id_accumulator); | |
145 | ||
146 | $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); | |
147 | ||
148 | // setup filters | |
149 | $filter_flags = $config->getBatch('Filter'); | |
150 | $custom_filters = $filter_flags['Custom']; | |
151 | unset($filter_flags['Custom']); | |
152 | $filters = array(); | |
153 | foreach ($filter_flags as $filter => $flag) { | |
154 | if (!$flag) continue; | |
f4f0f80d | 155 | if (strpos($filter, '.') !== false) continue; |
f45a286b AD |
156 | $class = "HTMLPurifier_Filter_$filter"; |
157 | $filters[] = new $class; | |
158 | } | |
159 | foreach ($custom_filters as $filter) { | |
160 | // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat | |
161 | $filters[] = $filter; | |
162 | } | |
163 | $filters = array_merge($filters, $this->filters); | |
164 | // maybe prepare(), but later | |
165 | ||
166 | for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { | |
167 | $html = $filters[$i]->preFilter($html, $config, $context); | |
168 | } | |
169 | ||
170 | // purified HTML | |
171 | $html = | |
172 | $this->generator->generateFromTokens( | |
173 | // list of tokens | |
174 | $this->strategy->execute( | |
175 | // list of un-purified tokens | |
176 | $lexer->tokenizeHTML( | |
177 | // un-purified HTML | |
178 | $html, $config, $context | |
179 | ), | |
180 | $config, $context | |
181 | ) | |
182 | ); | |
183 | ||
184 | for ($i = $filter_size - 1; $i >= 0; $i--) { | |
185 | $html = $filters[$i]->postFilter($html, $config, $context); | |
186 | } | |
187 | ||
188 | $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); | |
189 | $this->context =& $context; | |
190 | return $html; | |
191 | } | |
192 | ||
193 | /** | |
194 | * Filters an array of HTML snippets | |
195 | * @param $config Optional HTMLPurifier_Config object for this operation. | |
196 | * See HTMLPurifier::purify() for more details. | |
197 | * @return Array of purified HTML | |
198 | */ | |
199 | public function purifyArray($array_of_html, $config = null) { | |
200 | $context_array = array(); | |
201 | foreach ($array_of_html as $key => $html) { | |
202 | $array_of_html[$key] = $this->purify($html, $config); | |
203 | $context_array[$key] = $this->context; | |
204 | } | |
205 | $this->context = $context_array; | |
206 | return $array_of_html; | |
207 | } | |
208 | ||
209 | /** | |
210 | * Singleton for enforcing just one HTML Purifier in your system | |
211 | * @param $prototype Optional prototype HTMLPurifier instance to | |
212 | * overload singleton with, or HTMLPurifier_Config | |
213 | * instance to configure the generated version with. | |
214 | */ | |
215 | public static function instance($prototype = null) { | |
216 | if (!self::$instance || $prototype) { | |
217 | if ($prototype instanceof HTMLPurifier) { | |
218 | self::$instance = $prototype; | |
219 | } elseif ($prototype) { | |
220 | self::$instance = new HTMLPurifier($prototype); | |
221 | } else { | |
222 | self::$instance = new HTMLPurifier(); | |
223 | } | |
224 | } | |
225 | return self::$instance; | |
226 | } | |
227 | ||
228 | /** | |
229 | * @note Backwards compatibility, see instance() | |
230 | */ | |
231 | public static function getInstance($prototype = null) { | |
232 | return HTMLPurifier::instance($prototype); | |
233 | } | |
234 | ||
235 | } | |
236 | ||
237 | // vim: et sw=4 sts=4 |