]> git.wh0rd.org - tt-rss.git/blame - lib/htmlpurifier/library/HTMLPurifier/EntityParser.php
remove Archived articles from Uncategorized view
[tt-rss.git] / lib / htmlpurifier / library / HTMLPurifier / EntityParser.php
CommitLineData
f45a286b
AD
1<?php
2
3// if want to implement error collecting here, we'll need to use some sort
4// of global data (probably trigger_error) because it's impossible to pass
5// $config or $context to the callback functions.
6
7/**
8 * Handles referencing and derefencing character entities
9 */
10class HTMLPurifier_EntityParser
11{
12
13 /**
14 * Reference to entity lookup table.
15 */
16 protected $_entity_lookup;
17
18 /**
19 * Callback regex string for parsing entities.
20 */
21 protected $_substituteEntitiesRegex =
22'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
23// 1. hex 2. dec 3. string (XML style)
24
25
26 /**
27 * Decimal to parsed string conversion table for special entities.
28 */
29 protected $_special_dec2str =
30 array(
31 34 => '"',
32 38 => '&',
33 39 => "'",
34 60 => '<',
35 62 => '>'
36 );
37
38 /**
39 * Stripped entity names to decimal conversion table for special entities.
40 */
41 protected $_special_ent2dec =
42 array(
43 'quot' => 34,
44 'amp' => 38,
45 'lt' => 60,
46 'gt' => 62
47 );
48
49 /**
50 * Substitutes non-special entities with their parsed equivalents. Since
51 * running this whenever you have parsed character is t3h 5uck, we run
52 * it before everything else.
53 *
54 * @param $string String to have non-special entities parsed.
55 * @returns Parsed string.
56 */
57 public function substituteNonSpecialEntities($string) {
58 // it will try to detect missing semicolons, but don't rely on it
59 return preg_replace_callback(
60 $this->_substituteEntitiesRegex,
61 array($this, 'nonSpecialEntityCallback'),
62 $string
63 );
64 }
65
66 /**
67 * Callback function for substituteNonSpecialEntities() that does the work.
68 *
69 * @param $matches PCRE matches array, with 0 the entire match, and
70 * either index 1, 2 or 3 set with a hex value, dec value,
71 * or string (respectively).
72 * @returns Replacement string.
73 */
74
75 protected function nonSpecialEntityCallback($matches) {
76 // replaces all but big five
77 $entity = $matches[0];
78 $is_num = (@$matches[0][1] === '#');
79 if ($is_num) {
80 $is_hex = (@$entity[2] === 'x');
81 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
82
83 // abort for special characters
84 if (isset($this->_special_dec2str[$code])) return $entity;
85
86 return HTMLPurifier_Encoder::unichr($code);
87 } else {
88 if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
89 if (!$this->_entity_lookup) {
90 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
91 }
92 if (isset($this->_entity_lookup->table[$matches[3]])) {
93 return $this->_entity_lookup->table[$matches[3]];
94 } else {
95 return $entity;
96 }
97 }
98 }
99
100 /**
101 * Substitutes only special entities with their parsed equivalents.
102 *
103 * @notice We try to avoid calling this function because otherwise, it
104 * would have to be called a lot (for every parsed section).
105 *
106 * @param $string String to have non-special entities parsed.
107 * @returns Parsed string.
108 */
109 public function substituteSpecialEntities($string) {
110 return preg_replace_callback(
111 $this->_substituteEntitiesRegex,
112 array($this, 'specialEntityCallback'),
113 $string);
114 }
115
116 /**
117 * Callback function for substituteSpecialEntities() that does the work.
118 *
119 * This callback has same syntax as nonSpecialEntityCallback().
120 *
121 * @param $matches PCRE-style matches array, with 0 the entire match, and
122 * either index 1, 2 or 3 set with a hex value, dec value,
123 * or string (respectively).
124 * @returns Replacement string.
125 */
126 protected function specialEntityCallback($matches) {
127 $entity = $matches[0];
128 $is_num = (@$matches[0][1] === '#');
129 if ($is_num) {
130 $is_hex = (@$entity[2] === 'x');
131 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
132 return isset($this->_special_dec2str[$int]) ?
133 $this->_special_dec2str[$int] :
134 $entity;
135 } else {
136 return isset($this->_special_ent2dec[$matches[3]]) ?
137 $this->_special_ent2dec[$matches[3]] :
138 $entity;
139 }
140 }
141
142}
143
144// vim: et sw=4 sts=4