]>
Commit | Line | Data |
---|---|---|
010efc9b AD |
1 | <?php |
2 | ||
3 | /** | |
4 | * Abstract class for a set of proprietary modules that clean up (tidy) | |
5 | * poorly written HTML. | |
6 | * @todo Figure out how to protect some of these methods/properties | |
7 | */ | |
8 | class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule | |
9 | { | |
10 | ||
11 | /** | |
12 | * List of supported levels. Index zero is a special case "no fixes" | |
13 | * level. | |
14 | */ | |
15 | public $levels = array(0 => 'none', 'light', 'medium', 'heavy'); | |
16 | ||
17 | /** | |
18 | * Default level to place all fixes in. Disabled by default | |
19 | */ | |
20 | public $defaultLevel = null; | |
21 | ||
22 | /** | |
23 | * Lists of fixes used by getFixesForLevel(). Format is: | |
24 | * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); | |
25 | */ | |
26 | public $fixesForLevel = array( | |
27 | 'light' => array(), | |
28 | 'medium' => array(), | |
29 | 'heavy' => array() | |
30 | ); | |
31 | ||
32 | /** | |
33 | * Lazy load constructs the module by determining the necessary | |
34 | * fixes to create and then delegating to the populate() function. | |
35 | * @todo Wildcard matching and error reporting when an added or | |
36 | * subtracted fix has no effect. | |
37 | */ | |
38 | public function setup($config) { | |
39 | ||
40 | // create fixes, initialize fixesForLevel | |
41 | $fixes = $this->makeFixes(); | |
42 | $this->makeFixesForLevel($fixes); | |
43 | ||
44 | // figure out which fixes to use | |
45 | $level = $config->get('HTML.TidyLevel'); | |
46 | $fixes_lookup = $this->getFixesForLevel($level); | |
47 | ||
48 | // get custom fix declarations: these need namespace processing | |
49 | $add_fixes = $config->get('HTML.TidyAdd'); | |
50 | $remove_fixes = $config->get('HTML.TidyRemove'); | |
51 | ||
52 | foreach ($fixes as $name => $fix) { | |
53 | // needs to be refactored a little to implement globbing | |
54 | if ( | |
55 | isset($remove_fixes[$name]) || | |
56 | (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) | |
57 | ) { | |
58 | unset($fixes[$name]); | |
59 | } | |
60 | } | |
61 | ||
62 | // populate this module with necessary fixes | |
63 | $this->populate($fixes); | |
64 | ||
65 | } | |
66 | ||
67 | /** | |
68 | * Retrieves all fixes per a level, returning fixes for that specific | |
69 | * level as well as all levels below it. | |
70 | * @param $level String level identifier, see $levels for valid values | |
71 | * @return Lookup up table of fixes | |
72 | */ | |
73 | public function getFixesForLevel($level) { | |
74 | if ($level == $this->levels[0]) { | |
75 | return array(); | |
76 | } | |
77 | $activated_levels = array(); | |
78 | for ($i = 1, $c = count($this->levels); $i < $c; $i++) { | |
79 | $activated_levels[] = $this->levels[$i]; | |
80 | if ($this->levels[$i] == $level) break; | |
81 | } | |
82 | if ($i == $c) { | |
83 | trigger_error( | |
84 | 'Tidy level ' . htmlspecialchars($level) . ' not recognized', | |
85 | E_USER_WARNING | |
86 | ); | |
87 | return array(); | |
88 | } | |
89 | $ret = array(); | |
90 | foreach ($activated_levels as $level) { | |
91 | foreach ($this->fixesForLevel[$level] as $fix) { | |
92 | $ret[$fix] = true; | |
93 | } | |
94 | } | |
95 | return $ret; | |
96 | } | |
97 | ||
98 | /** | |
99 | * Dynamically populates the $fixesForLevel member variable using | |
100 | * the fixes array. It may be custom overloaded, used in conjunction | |
101 | * with $defaultLevel, or not used at all. | |
102 | */ | |
103 | public function makeFixesForLevel($fixes) { | |
104 | if (!isset($this->defaultLevel)) return; | |
105 | if (!isset($this->fixesForLevel[$this->defaultLevel])) { | |
106 | trigger_error( | |
107 | 'Default level ' . $this->defaultLevel . ' does not exist', | |
108 | E_USER_ERROR | |
109 | ); | |
110 | return; | |
111 | } | |
112 | $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); | |
113 | } | |
114 | ||
115 | /** | |
116 | * Populates the module with transforms and other special-case code | |
117 | * based on a list of fixes passed to it | |
118 | * @param $lookup Lookup table of fixes to activate | |
119 | */ | |
120 | public function populate($fixes) { | |
121 | foreach ($fixes as $name => $fix) { | |
122 | // determine what the fix is for | |
123 | list($type, $params) = $this->getFixType($name); | |
124 | switch ($type) { | |
125 | case 'attr_transform_pre': | |
126 | case 'attr_transform_post': | |
127 | $attr = $params['attr']; | |
128 | if (isset($params['element'])) { | |
129 | $element = $params['element']; | |
130 | if (empty($this->info[$element])) { | |
131 | $e = $this->addBlankElement($element); | |
132 | } else { | |
133 | $e = $this->info[$element]; | |
134 | } | |
135 | } else { | |
136 | $type = "info_$type"; | |
137 | $e = $this; | |
138 | } | |
139 | // PHP does some weird parsing when I do | |
140 | // $e->$type[$attr], so I have to assign a ref. | |
141 | $f =& $e->$type; | |
142 | $f[$attr] = $fix; | |
143 | break; | |
144 | case 'tag_transform': | |
145 | $this->info_tag_transform[$params['element']] = $fix; | |
146 | break; | |
147 | case 'child': | |
148 | case 'content_model_type': | |
149 | $element = $params['element']; | |
150 | if (empty($this->info[$element])) { | |
151 | $e = $this->addBlankElement($element); | |
152 | } else { | |
153 | $e = $this->info[$element]; | |
154 | } | |
155 | $e->$type = $fix; | |
156 | break; | |
157 | default: | |
158 | trigger_error("Fix type $type not supported", E_USER_ERROR); | |
159 | break; | |
160 | } | |
161 | } | |
162 | } | |
163 | ||
164 | /** | |
165 | * Parses a fix name and determines what kind of fix it is, as well | |
166 | * as other information defined by the fix | |
167 | * @param $name String name of fix | |
168 | * @return array(string $fix_type, array $fix_parameters) | |
169 | * @note $fix_parameters is type dependant, see populate() for usage | |
170 | * of these parameters | |
171 | */ | |
172 | public function getFixType($name) { | |
173 | // parse it | |
174 | $property = $attr = null; | |
175 | if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); | |
176 | if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); | |
177 | ||
178 | // figure out the parameters | |
179 | $params = array(); | |
180 | if ($name !== '') $params['element'] = $name; | |
181 | if (!is_null($attr)) $params['attr'] = $attr; | |
182 | ||
183 | // special case: attribute transform | |
184 | if (!is_null($attr)) { | |
185 | if (is_null($property)) $property = 'pre'; | |
186 | $type = 'attr_transform_' . $property; | |
187 | return array($type, $params); | |
188 | } | |
189 | ||
190 | // special case: tag transform | |
191 | if (is_null($property)) { | |
192 | return array('tag_transform', $params); | |
193 | } | |
194 | ||
195 | return array($property, $params); | |
196 | ||
197 | } | |
198 | ||
199 | /** | |
200 | * Defines all fixes the module will perform in a compact | |
201 | * associative array of fix name to fix implementation. | |
202 | */ | |
203 | public function makeFixes() {} | |
204 | ||
205 | } | |
206 | ||
207 | // vim: et sw=4 sts=4 |