array_push($articles, format_article($link, $id, false));
} else if ($mode == "zoom") {
array_push($articles, format_article($link, $id, false, true, true));
+ } else if ($mode == "raw") {
+ if ($_REQUEST['html']) header("Content-Type: text/html");
+
+ $article = format_article($link, $id, false);
+ print $article['id'] . "\n\n";
+ print $article['content'];
+ return;
} else {
catchupArticleById($link, $id, 0);
}
$config = HTMLPurifier_Config::createDefault();
- $allowed = "p,a[href],i,em,b,strong,code,pre,blockquote,br,img[src|alt|title],ul,ol,li,h1,h2,h3,h4,s";
+ $allowed = "p,a[href],i,em,b,strong,code,pre,blockquote,br,img[src|alt|title],ul,ol,li,h1,h2,h3,h4,s,object[classid|type|id|name|width|height|codebase],param[name|value]";
+ $config->set('HTML.SafeObject', true);
$config->set('HTML', 'Allowed', $allowed);
+ $config->set('Output.FlashCompat', true);
+ $config->set('Attr.EnableID', true);
+
$purifier = new HTMLPurifier($config);
/**
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
+ * It also does some sanity checks.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
}
}
+if (ini_get('zend.ze1_compatibility_mode')) {
+ trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
+}
+
// vim: et sw=4 sts=4
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
- * @version 3.3.0
+ * @version 4.3.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
+require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require 'HTMLPurifier/AttrDef/HTML/Class.php';
require 'HTMLPurifier/AttrDef/HTML/Color.php';
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require 'HTMLPurifier/AttrDef/HTML/ID.php';
require 'HTMLPurifier/AttrDef/HTML/Length.php';
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
-require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/URI/Email.php';
require 'HTMLPurifier/AttrDef/URI/Host.php';
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
+require 'HTMLPurifier/AttrTransform/NameSync.php';
+require 'HTMLPurifier/AttrTransform/Nofollow.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php';
+require 'HTMLPurifier/HTMLModule/Nofollow.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
+require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
+require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
+require 'HTMLPurifier/URIScheme/data.php';
+require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';
$allowed_attributes["$element.$attribute"] = true;
}
}
- $config->set('HTML', 'AllowedElements', $allowed_elements);
- $config->set('HTML', 'AllowedAttributes', $allowed_attributes);
+ $config->set('HTML.AllowedElements', $allowed_elements);
+ $config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) {
- $config->set('URI', 'AllowedSchemes', $allowed_protocols);
+ $config->set('URI.AllowedSchemes', $allowed_protocols);
}
$purifier = new HTMLPurifier($config);
return $purifier->purify($string);
*/
/*
- HTML Purifier 3.3.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.3.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
{
/** Version of HTML Purifier */
- public $version = '3.3.0';
+ public $version = '4.3.0';
/** Constant with version of HTML Purifier */
- const VERSION = '3.3.0';
+ const VERSION = '4.3.0';
/** Global configuration object */
public $config;
$context->register('Generator', $this->generator);
// set up global context variables
- if ($config->get('Core', 'CollectErrors')) {
+ if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue;
+ if (strpos($filter, '.') !== false) continue;
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
-require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
+require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
+require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
+ /**
+ * Parses a possibly escaped CSS string and returns the "pure"
+ * version of it.
+ */
+ protected function expandCSSEscape($string) {
+ // flexibly parse it
+ $ret = '';
+ for ($i = 0, $c = strlen($string); $i < $c; $i++) {
+ if ($string[$i] === '\\') {
+ $i++;
+ if ($i >= $c) {
+ $ret .= '\\';
+ break;
+ }
+ if (ctype_xdigit($string[$i])) {
+ $code = $string[$i];
+ for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
+ if (!ctype_xdigit($string[$i])) break;
+ $code .= $string[$i];
+ }
+ // We have to be extremely careful when adding
+ // new characters, to make sure we're not breaking
+ // the encoding.
+ $char = HTMLPurifier_Encoder::unichr(hexdec($code));
+ if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
+ $ret .= $char;
+ if ($i < $c && trim($string[$i]) !== '') $i--;
+ continue;
+ }
+ if ($string[$i] === "\n") continue;
+ }
+ $ret .= $string[$i];
+ }
+ return $ret;
+ }
+
}
// vim: et sw=4 sts=4
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
- $keywords['c'] = false; // center
+ $keywords['ch'] = false; // center (first word)
+ $keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
+ if ($status == 'c') {
+ if ($i == 0) {
+ $status = 'ch';
+ } else {
+ $status = 'cv';
+ }
+ }
$keywords[$status] = $lbit;
$i++;
}
if (!$i) return false; // no valid values were caught
-
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
- elseif (count($measures)) $ret[] = array_shift($measures);
- elseif ($keywords['c']) {
- $ret[] = $keywords['c'];
- $keywords['c'] = false; // prevent re-use: center = center center
+ elseif ($keywords['ch']) {
+ $ret[] = $keywords['ch'];
+ $keywords['cv'] = false; // prevent re-use: center = center center
}
+ elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
+ elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
- elseif ($keywords['c']) $ret[] = $keywords['c'];
if (empty($ret)) return false;
return implode(' ', $ret);
public function validate($color, $config, $context) {
static $colors = null;
- if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
+ if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$color = trim($color);
if ($color === '') return false;
/**
* Validates a font family list according to CSS spec
- * @todo whitelisting allowed fonts would be nice
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
+ protected $mask = null;
+
+ public function __construct() {
+ $this->mask = '- ';
+ for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
+ for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
+ for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
+ // special bytes used by UTF-8
+ for ($i = 0x80; $i <= 0xFF; $i++) {
+ // We don't bother excluding invalid bytes in this range,
+ // because the our restriction of well-formed UTF-8 will
+ // prevent these from ever occurring.
+ $this->mask .= chr($i);
+ }
+
+ /*
+ PHP's internal strcspn implementation is
+ O(length of string * length of mask), making it inefficient
+ for large masks. However, it's still faster than
+ preg_match 8)
+ for (p = s1;;) {
+ spanp = s2;
+ do {
+ if (*spanp == c || p == s1_end) {
+ return p - s1;
+ }
+ } while (spanp++ < (s2_end - 1));
+ c = *++p;
+ }
+ */
+ // possible optimization: invert the mask.
+ }
+
public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
'fantasy' => true,
'cursive' => true
);
+ $allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them
$fonts = explode(',', $string);
if ($font === '') continue;
// match a generic name
if (isset($generic_names[$font])) {
- $final .= $font . ', ';
+ if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
+ $final .= $font . ', ';
+ }
continue;
}
// match a quoted name
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
+ }
- $new_font = '';
- for ($i = 0, $c = strlen($font); $i < $c; $i++) {
- if ($font[$i] === '\\') {
- $i++;
- if ($i >= $c) {
- $new_font .= '\\';
- break;
- }
- if (ctype_xdigit($font[$i])) {
- $code = $font[$i];
- for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
- if (!ctype_xdigit($font[$i])) break;
- $code .= $font[$i];
- }
- // We have to be extremely careful when adding
- // new characters, to make sure we're not breaking
- // the encoding.
- $char = HTMLPurifier_Encoder::unichr(hexdec($code));
- if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
- $new_font .= $char;
- if ($i < $c && trim($font[$i]) !== '') $i--;
- continue;
- }
- if ($font[$i] === "\n") continue;
- }
- $new_font .= $font[$i];
- }
+ $font = $this->expandCSSEscape($font);
- $font = $new_font;
- }
// $font is a pure representation of the font name
+ if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
+ continue;
+ }
+
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
- // complicated font, requires quoting
+ // bugger out on whitespace. form feed (0C) really
+ // shouldn't show up regardless
+ $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
+
+ // Here, there are various classes of characters which need
+ // to be treated differently:
+ // - Alphanumeric characters are essentially safe. We
+ // handled these above.
+ // - Spaces require quoting, though most parsers will do
+ // the right thing if there aren't any characters that
+ // can be misinterpreted
+ // - Dashes rarely occur, but they fairly unproblematic
+ // for parsing/rendering purposes.
+ // The above characters cover the majority of Western font
+ // names.
+ // - Arbitrary Unicode characters not in ASCII. Because
+ // most parsers give little thought to Unicode, treatment
+ // of these codepoints is basically uniform, even for
+ // punctuation-like codepoints. These characters can
+ // show up in non-Western pages and are supported by most
+ // major browsers, for example: "MS 明朝" is a
+ // legitimate font-name
+ // <http://ja.wikipedia.org/wiki/MS_明朝>. See
+ // the CSS3 spec for more examples:
+ // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
+ // You can see live samples of these on the Internet:
+ // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
+ // However, most of these fonts have ASCII equivalents:
+ // for example, 'MS Mincho', and it's considered
+ // professional to use ASCII font names instead of
+ // Unicode font names. Thanks Takeshi Terada for
+ // providing this information.
+ // The following characters, to my knowledge, have not been
+ // used to name font names.
+ // - Single quote. While theoretically you might find a
+ // font name that has a single quote in its name (serving
+ // as an apostrophe, e.g. Dave's Scribble), I haven't
+ // been able to find any actual examples of this.
+ // Internet Explorer's cssText translation (which I
+ // believe is invoked by innerHTML) normalizes any
+ // quoting to single quotes, and fails to escape single
+ // quotes. (Note that this is not IE's behavior for all
+ // CSS properties, just some sort of special casing for
+ // font-family). So a single quote *cannot* be used
+ // safely in the font-family context if there will be an
+ // innerHTML/cssText translation. Note that Firefox 3.x
+ // does this too.
+ // - Double quote. In IE, these get normalized to
+ // single-quotes, no matter what the encoding. (Fun
+ // fact, in IE8, the 'content' CSS property gained
+ // support, where they special cased to preserve encoded
+ // double quotes, but still translate unadorned double
+ // quotes into single quotes.) So, because their
+ // fixpoint behavior is identical to single quotes, they
+ // cannot be allowed either. Firefox 3.x displays
+ // single-quote style behavior.
+ // - Backslashes are reduced by one (so \\ -> \) every
+ // iteration, so they cannot be used safely. This shows
+ // up in IE7, IE8 and FF3
+ // - Semicolons, commas and backticks are handled properly.
+ // - The rest of the ASCII punctuation is handled properly.
+ // We haven't checked what browsers do to unadorned
+ // versions, but this is not important as long as the
+ // browser doesn't /remove/ surrounding quotes (as IE does
+ // for HTML).
+ //
+ // With these results in hand, we conclude that there are
+ // various levels of safety:
+ // - Paranoid: alphanumeric, spaces and dashes(?)
+ // - International: Paranoid + non-ASCII Unicode
+ // - Edgy: Everything except quotes, backslashes
+ // - NoJS: Standards compliance, e.g. sod IE. Note that
+ // with some judicious character escaping (since certain
+ // types of escaping doesn't work) this is theoretically
+ // OK as long as innerHTML/cssText is not called.
+ // We believe that international is a reasonable default
+ // (that we will implement now), and once we do more
+ // extensive research, we may feel comfortable with dropping
+ // it down to edgy.
+
+ // Edgy: alphanumeric, spaces, dashes and Unicode. Use of
+ // str(c)spn assumes that the string was already well formed
+ // Unicode (which of course it is).
+ if (strspn($font, $this->mask) !== strlen($font)) {
+ continue;
+ }
+
+ // Historical:
+ // In the absence of innerHTML/cssText, these ugly
+ // transforms don't pose a security risk (as \\ and \"
+ // might--these escapes are not supported by most browsers).
+ // We could try to be clever and use single-quote wrapping
+ // when there is a double quote present, but I have choosen
+ // not to implement that. (NOTE: you can reduce the amount
+ // of escapes by one depending on what quoting style you use)
+ // $font = str_replace('\\', '\\5C ', $font);
+ // $font = str_replace('"', '\\22 ', $font);
+ // $font = str_replace("'", '\\27 ', $font);
- // armor single quotes and new lines
- $font = str_replace("\\", "\\\\", $font);
- $font = str_replace("'", "\\'", $font);
+ // font possibly with spaces, requires quoting
$final .= "'$font', ";
}
$final = rtrim($final, ', ');
$uri = substr($uri, 1, $new_length - 1);
}
- $keys = array( '(', ')', ',', ' ', '"', "'");
- $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
- $uri = str_replace($values, $keys, $uri);
+ $uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
- // escape necessary characters according to CSS spec
- // except for the comma, none of these should appear in the
- // URI at all
- $result = str_replace($keys, $values, $result);
+ // extra sanity check; should have been done by URI
+ $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
- return "url($result)";
+ // suspicious characters are ()'; we're going to percent encode
+ // them for safety.
+ $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
+
+ // there's an extra bug where ampersands lose their escaping on
+ // an innerHTML cycle, so a very unlucky query parameter could
+ // then change the meaning of the URL. Unfortunately, there's
+ // not much we can do about that...
+
+ return "url(\"$result\")";
}
--- /dev/null
+<?php
+
+/**
+ * Implements special behavior for class attribute (normally NMTOKENS)
+ */
+class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
+{
+ protected function split($string, $config, $context) {
+ // really, this twiddle should be lazy loaded
+ $name = $config->getDefinition('HTML')->doctype->name;
+ if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
+ return parent::split($string, $config, $context);
+ } else {
+ return preg_split('/\s+/', $string);
+ }
+ }
+ protected function filter($tokens, $config, $context) {
+ $allowed = $config->get('Attr.AllowedClasses');
+ $forbidden = $config->get('Attr.ForbiddenClasses');
+ $ret = array();
+ foreach ($tokens as $token) {
+ if (
+ ($allowed === null || isset($allowed[$token])) &&
+ !isset($forbidden[$token]) &&
+ // We need this O(n) check because of PHP's array
+ // implementation that casts -0 to 0.
+ !in_array($token, $ret, true)
+ ) {
+ $ret[] = $token;
+ }
+ }
+ return $ret;
+ }
+}
public function validate($string, $config, $context) {
static $colors = null;
- if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
+ if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$string = trim($string);
public function __construct() {}
public function validate($string, $config, $context) {
- if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
+ if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
return parent::validate($string, $config, $context);
}
public function validate($id, $config, $context) {
- if (!$config->get('Attr', 'EnableID')) return false;
+ if (!$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
if ($id === '') return false;
- $prefix = $config->get('Attr', 'IDPrefix');
+ $prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
- $prefix .= $config->get('Attr', 'IDPrefixLocal');
+ $prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
- } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
+ } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
'%Attr.IDPrefix is set', E_USER_WARNING);
}
$result = ($trim === '');
}
- $regexp = $config->get('Attr', 'IDBlacklistRegexp');
+ $regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) {
return false;
}
public function validate($string, $config, $context) {
- $allowed = $config->get('Attr', $this->name);
+ $allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) return false;
$string = $this->parseCDATA($string);
/**
* Validates contents based on NMTOKENS attribute type.
- * @note The only current use for this is the class attribute in HTML
- * @note Could have some functionality factored out into Nmtoken class
- * @warning We cannot assume this class will be used only for 'class'
- * attributes. Not sure how to hook in magic behavior, then.
*/
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
+ $tokens = $this->split($string, $config, $context);
+ $tokens = $this->filter($tokens, $config, $context);
+ if (empty($tokens)) return false;
+ return implode(' ', $tokens);
+
+ }
+
+ /**
+ * Splits a space separated list of tokens into its constituent parts.
+ */
+ protected function split($string, $config, $context) {
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation
// escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never
// see that anyway).
- $matches = array();
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches);
+ return $matches[1];
+ }
- if (empty($matches[1])) return false;
-
- // reconstruct string
- $new_string = '';
- foreach ($matches[1] as $token) {
- $new_string .= $token . ' ';
- }
- $new_string = rtrim($new_string);
-
- return $new_string;
-
+ /**
+ * Template method for removing certain tokens based on arbitrary criteria.
+ * @note If we wanted to be really functional, we'd do an array_filter
+ * with a callback. But... we're not.
+ */
+ protected function filter($tokens, $config, $context) {
+ return $tokens;
}
}
public function validate($uri, $config, $context) {
- if ($config->get('URI', 'Disable')) return false;
+ if ($config->get('URI.Disable')) return false;
$uri = $this->parseCDATA($uri);
public function validate($string, $config, $context) {
$length = strlen($string);
+ // empty hostname is OK; it's usually semantically equivalent:
+ // the default host as defined by a URI scheme is used:
+ //
+ // If the URI scheme defines a default for host, then that
+ // default applies when the host subcomponent is undefined
+ // or when the registered name is empty (zero length).
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
public function transform($attr, $config, $context) {
if (isset($attr['dir'])) return $attr;
- $attr['dir'] = $config->get('Attr', 'DefaultTextDir');
+ $attr['dir'] = $config->get('Attr.DefaultTextDir');
return $attr;
}
$src = true;
if (!isset($attr['src'])) {
- if ($config->get('Core', 'RemoveInvalidImg')) return $attr;
- $attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
+ if ($config->get('Core.RemoveInvalidImg')) return $attr;
+ $attr['src'] = $config->get('Attr.DefaultInvalidImage');
$src = false;
}
if (!isset($attr['alt'])) {
if ($src) {
- $alt = $config->get('Attr', 'DefaultImageAlt');
+ $alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
- $attr['alt'] = basename($attr['src']);
+ // truncate if the alt is too long
+ $attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}
} else {
- $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
+ $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
}
}
{
public function transform($attr, $config, $context) {
+ // Abort early if we're using relaxed definition of name
+ if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
if (!isset($attr['name'])) return $attr;
$id = $this->confiscateAttr($attr, 'name');
if ( isset($attr['id'])) return $attr;
--- /dev/null
+<?php
+
+/**
+ * Post-transform that performs validation to the name attribute; if
+ * it is present with an equivalent id attribute, it is passed through;
+ * otherwise validation is performed.
+ */
+class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
+{
+
+ public function __construct() {
+ $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
+ }
+
+ public function transform($attr, $config, $context) {
+ if (!isset($attr['name'])) return $attr;
+ $name = $attr['name'];
+ if (isset($attr['id']) && $attr['id'] === $name) return $attr;
+ $result = $this->idDef->validate($name, $config, $context);
+ if ($result === false) unset($attr['name']);
+ else $attr['name'] = $result;
+ return $attr;
+ }
+
+}
+
+// vim: et sw=4 sts=4
--- /dev/null
+<?php
+
+// must be called POST validation
+
+/**
+ * Adds rel="nofollow" to all outbound links. This transform is
+ * only attached if Attr.Nofollow is TRUE.
+ */
+class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
+{
+ private $parser;
+
+ public function __construct() {
+ $this->parser = new HTMLPurifier_URIParser();
+ }
+
+ public function transform($attr, $config, $context) {
+
+ if (!isset($attr['href'])) {
+ return $attr;
+ }
+
+ // XXX Kind of inefficient
+ $url = $this->parser->parse($attr['href']);
+ $scheme = $url->getSchemeObj($config, $context);
+
+ if (!is_null($url->host) && $scheme !== false && $scheme->browsable) {
+ if (isset($attr['rel'])) {
+ $attr['rel'] .= ' nofollow';
+ } else {
+ $attr['rel'] = 'nofollow';
+ }
+ }
+
+ return $attr;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
+ $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
}
public function transform($attr, $config, $context) {
case 'allowNetworking':
$attr['value'] = 'internal';
break;
+ case 'allowFullScreen':
+ if ($config->get('HTML.FlashAllowFullScreen')) {
+ $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
+ } else {
+ $attr['value'] = 'false';
+ }
+ break;
case 'wmode':
- $attr['value'] = 'window';
+ $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
break;
case 'movie':
+ case 'src':
+ $attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
+ case 'flashvars':
+ // we're going to allow arbitrary inputs to the SWF, on
+ // the reasoning that it could only hack the SWF, not us.
+ break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
+ // "proprietary" types
+ $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
+
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
- require HTMLPURIFIER_PREFIX . '/' . $file;
+ // Technically speaking, it should be ok and more efficient to
+ // just do 'require', but Antonio Parraga reports that with
+ // Zend extensions such as Zend debugger and APC, this invariant
+ // may be broken. Since we have efficient alternatives, pay
+ // the cost here and avoid the bug.
+ require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
+ $buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
- if (is_array($func)) {
+ if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
- $max = $config->get('CSS', 'MaxImgLength');
+ $max = $config->get('CSS.MaxImgLength');
$this->info['width'] =
$this->info['height'] =
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
- if ($config->get('CSS', 'Proprietary')) {
+ if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
- if ($config->get('CSS', 'AllowTricky')) {
+ if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
- $allow_important = $config->get('CSS', 'AllowImportant');
+ if ($config->get('CSS.Trusted')) {
+ $this->doSetupTrusted($config);
+ }
+
+ $allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
}
+ protected function doSetupTrusted($config) {
+ $this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'static', 'relative', 'absolute', 'fixed'
+ ));
+ $this->info['top'] =
+ $this->info['left'] =
+ $this->info['right'] =
+ $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length(),
+ new HTMLPurifier_AttrDef_CSS_Percentage(),
+ new HTMLPurifier_AttrDef_Enum(array('auto')),
+ ));
+ $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Integer(),
+ new HTMLPurifier_AttrDef_Enum(array('auto')),
+ ));
+ }
/**
* Performs extra config-based processing. Based off of
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
- $allowed_attributes = $config->get('CSS', 'AllowedProperties');
- if ($allowed_attributes !== null) {
+ $allowed_properties = $config->get('CSS.AllowedProperties');
+ if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
- if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
- unset($allowed_attributes[$name]);
+ if(!isset($allowed_properties[$name])) unset($this->info[$name]);
+ unset($allowed_properties[$name]);
}
// emit errors
- foreach ($allowed_attributes as $name => $d) {
+ foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
+ $forbidden_properties = $config->get('CSS.ForbiddenProperties');
+ if ($forbidden_properties !== null) {
+ foreach ($this->info as $name => $d) {
+ if (isset($forbidden_properties[$name])) {
+ unset($this->info[$name]);
+ }
+ }
+ }
+
}
}
$all_whitespace = true;
// some configuration
- $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
+ $escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
// generator
$gen = new HTMLPurifier_Generator($config, $context);
/**
* HTML Purifier's version
*/
- public $version = '3.3.0';
+ public $version = '4.3.0';
/**
* Bool indicator whether or not to automatically finalize
*/
protected $plist;
+ /**
+ * Whether or not a set is taking place due to an
+ * alias lookup.
+ */
+ private $aliasMode;
+
+ /**
+ * Set to false if you do not want line and file numbers in errors
+ * (useful when unit testing). This will also compress some errors
+ * and exceptions.
+ */
+ public $chatty = true;
+
+ /**
+ * Current lock; only gets to this namespace are allowed.
+ */
+ private $lock;
+
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
- public function __construct($definition) {
- $this->plist = new HTMLPurifier_PropertyList($definition->defaultPlist);
+ public function __construct($definition, $parent = null) {
+ $parent = $parent ? $parent : $definition->defaultPlist;
+ $this->plist = new HTMLPurifier_PropertyList($parent);
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
return $ret;
}
+ /**
+ * Creates a new config object that inherits from a previous one.
+ * @param HTMLPurifier_Config $config Configuration object to inherit
+ * from.
+ * @return HTMLPurifier_Config object with $config as its parent.
+ */
+ public static function inherit(HTMLPurifier_Config $config) {
+ return new HTMLPurifier_Config($config->def, $config->plist);
+ }
+
/**
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.
/**
* Retreives a value from the configuration.
- * @param $namespace String namespace
* @param $key String key
*/
- public function get($namespace, $key) {
- if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
- if (!isset($this->def->info[$namespace][$key])) {
+ public function get($key, $a = null) {
+ if ($a !== null) {
+ $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
+ $key = "$key.$a";
+ }
+ if (!$this->finalized) $this->autoFinalize();
+ if (!isset($this->def->info[$key])) {
// can't add % due to SimpleTest bug
- trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
+ $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
E_USER_WARNING);
return;
}
- if (isset($this->def->info[$namespace][$key]->isAlias)) {
- $d = $this->def->info[$namespace][$key];
- trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
+ if (isset($this->def->info[$key]->isAlias)) {
+ $d = $this->def->info[$key];
+ $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
E_USER_ERROR);
return;
}
- return $this->plist->get("$namespace.$key");
+ if ($this->lock) {
+ list($ns) = explode('.', $key);
+ if ($ns !== $this->lock) {
+ $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
+ return;
+ }
+ }
+ return $this->plist->get($key);
}
/**
* @param $namespace String namespace
*/
public function getBatch($namespace) {
- if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
- if (!isset($this->def->info[$namespace])) {
- trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
+ if (!$this->finalized) $this->autoFinalize();
+ $full = $this->getAll();
+ if (!isset($full[$namespace])) {
+ $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
- $full = $this->getAll();
return $full[$namespace];
}
/**
* Retrieves all directives, organized by namespace
+ * @warning This is a pretty inefficient function, avoid if you can
*/
public function getAll() {
- if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ if (!$this->finalized) $this->autoFinalize();
$ret = array();
foreach ($this->plist->squash() as $name => $value) {
list($ns, $key) = explode('.', $name, 2);
/**
* Sets a value to configuration.
- * @param $namespace String namespace
* @param $key String key
* @param $value Mixed value
*/
- public function set($namespace, $key, $value, $from_alias = false) {
+ public function set($key, $value, $a = null) {
+ if (strpos($key, '.') === false) {
+ $namespace = $key;
+ $directive = $value;
+ $value = $a;
+ $key = "$key.$directive";
+ $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
+ } else {
+ list($namespace) = explode('.', $key);
+ }
if ($this->isFinalized('Cannot set directive after finalization')) return;
- if (!isset($this->def->info[$namespace][$key])) {
- trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
+ if (!isset($this->def->info[$key])) {
+ $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
E_USER_WARNING);
return;
}
- $def = $this->def->info[$namespace][$key];
+ $def = $this->def->info[$key];
if (isset($def->isAlias)) {
- if ($from_alias) {
- trigger_error('Double-aliases not allowed, please fix '.
- 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
+ if ($this->aliasMode) {
+ $this->triggerError('Double-aliases not allowed, please fix '.
+ 'ConfigSchema bug with' . $key, E_USER_ERROR);
return;
}
- $this->set($new_ns = $def->namespace,
- $new_dir = $def->name,
- $value, true);
- trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
+ $this->aliasMode = true;
+ $this->set($def->key, $value);
+ $this->aliasMode = false;
+ $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
return;
}
try {
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
- trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
+ $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
return;
}
if (is_string($value) && is_object($def)) {
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
- trigger_error('Value not supported, valid values are: ' .
+ $this->triggerError('Value not supported, valid values are: ' .
$this->_listify($def->allowed), E_USER_WARNING);
return;
}
}
- $this->plist->set("$namespace.$key", $value);
+ $this->plist->set($key, $value);
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
- if ($namespace == 'HTML' || $namespace == 'CSS') {
+ if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
$this->definitions[$namespace] = null;
}
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawHTMLDefinition, which is more explicitly
+ * named, instead.
*/
- public function getHTMLDefinition($raw = false) {
- return $this->getDefinition('HTML', $raw);
+ public function getHTMLDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawCSSDefinition, which is more explicitly
+ * named, instead.
*/
- public function getCSSDefinition($raw = false) {
- return $this->getDefinition('CSS', $raw);
+ public function getCSSDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('CSS', $raw, $optimized);
+ }
+
+ /**
+ * Retrieves object reference to the URI definition
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawURIDefinition, which is more explicitly
+ * named, instead.
+ */
+ public function getURIDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
+ * @param $optimized Only has an effect when $raw is true. Whether
+ * or not to return null if the result is already present in
+ * the cache. This is off by default for backwards
+ * compatibility reasons, but you need to do things this
+ * way in order to ensure that caching is done properly.
+ * Check out enduser-customize.html for more details.
+ * We probably won't ever change this default, as much as the
+ * maybe semantics is the "right thing to do."
*/
- public function getDefinition($type, $raw = false) {
- if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ public function getDefinition($type, $raw = false, $optimized = false) {
+ if ($optimized && !$raw) {
+ throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
+ }
+ if (!$this->finalized) $this->autoFinalize();
+ // temporarily suspend locks, so we can handle recursive definition calls
+ $lock = $this->lock;
+ $this->lock = null;
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
+ $this->lock = $lock;
if (!$raw) {
- // see if we can quickly supply a definition
+ // full definition
+ // ---------------
+ // check if definition is in memory
+ if (!empty($this->definitions[$type])) {
+ $def = $this->definitions[$type];
+ // check if the definition is setup
+ if ($def->setup) {
+ return $def;
+ } else {
+ $def->setup($this);
+ if ($def->optimized) $cache->add($def, $this);
+ return $def;
+ }
+ }
+ // check if definition is in cache
+ $def = $cache->get($this);
+ if ($def) {
+ // definition in cache, save to memory and return it
+ $this->definitions[$type] = $def;
+ return $def;
+ }
+ // initialize it
+ $def = $this->initDefinition($type);
+ // set it up
+ $this->lock = $type;
+ $def->setup($this);
+ $this->lock = null;
+ // save in cache
+ $cache->add($def, $this);
+ // return it
+ return $def;
+ } else {
+ // raw definition
+ // --------------
+ // check preconditions
+ $def = null;
+ if ($optimized) {
+ if (is_null($this->get($type . '.DefinitionID'))) {
+ // fatally error out if definition ID not set
+ throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
+ }
+ }
if (!empty($this->definitions[$type])) {
- if (!$this->definitions[$type]->setup) {
- $this->definitions[$type]->setup($this);
- $cache->set($this->definitions[$type], $this);
+ $def = $this->definitions[$type];
+ if ($def->setup && !$optimized) {
+ $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
+ throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
+ }
+ if ($def->optimized === null) {
+ $extra = $this->chatty ? " (try flushing your cache)" : "";
+ throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
+ }
+ if ($def->optimized !== $optimized) {
+ $msg = $optimized ? "optimized" : "unoptimized";
+ $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
+ throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
}
- return $this->definitions[$type];
}
- // memory check missed, try cache
- $this->definitions[$type] = $cache->get($this);
- if ($this->definitions[$type]) {
- // definition in cache, return it
- return $this->definitions[$type];
+ // check if definition was in memory
+ if ($def) {
+ if ($def->setup) {
+ // invariant: $optimized === true (checked above)
+ return null;
+ } else {
+ return $def;
+ }
+ }
+ // if optimized, check if definition was in cache
+ // (because we do the memory check first, this formulation
+ // is prone to cache slamming, but I think
+ // guaranteeing that either /all/ of the raw
+ // setup code or /none/ of it is run is more important.)
+ if ($optimized) {
+ // This code path only gets run once; once we put
+ // something in $definitions (which is guaranteed by the
+ // trailing code), we always short-circuit above.
+ $def = $cache->get($this);
+ if ($def) {
+ // save the full definition for later, but don't
+ // return it yet
+ $this->definitions[$type] = $def;
+ return null;
+ }
+ }
+ // check invariants for creation
+ if (!$optimized) {
+ if (!is_null($this->get($type . '.DefinitionID'))) {
+ if ($this->chatty) {
+ $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
+ } else {
+ $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
+ }
+ }
}
- } elseif (
- !empty($this->definitions[$type]) &&
- !$this->definitions[$type]->setup
- ) {
- // raw requested, raw in memory, quick return
- return $this->definitions[$type];
+ // initialize it
+ $def = $this->initDefinition($type);
+ $def->optimized = $optimized;
+ return $def;
}
+ throw new HTMLPurifier_Exception("The impossible happened!");
+ }
+
+ private function initDefinition($type) {
// quick checks failed, let's create the object
if ($type == 'HTML') {
- $this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
+ $def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
- $this->definitions[$type] = new HTMLPurifier_CSSDefinition();
+ $def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
- $this->definitions[$type] = new HTMLPurifier_URIDefinition();
+ $def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
- // quick abort if raw
- if ($raw) {
- if (is_null($this->get($type, 'DefinitionID'))) {
- // fatally error out if definition ID not set
- throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
- }
- return $this->definitions[$type];
- }
- // set it up
- $this->definitions[$type]->setup($this);
- // save in cache
- $cache->set($this->definitions[$type], $this);
- return $this->definitions[$type];
+ $this->definitions[$type] = $def;
+ return $def;
+ }
+
+ public function maybeGetRawDefinition($name) {
+ return $this->getDefinition($name, true, true);
+ }
+
+ public function maybeGetRawHTMLDefinition() {
+ return $this->getDefinition('HTML', true, true);
+ }
+
+ public function maybeGetRawCSSDefinition() {
+ return $this->getDefinition('CSS', true, true);
+ }
+
+ public function maybeGetRawURIDefinition() {
+ return $this->getDefinition('URI', true, true);
}
/**
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
- // condensed form
- list($namespace, $directive) = explode('.', $key);
- $this->set($namespace, $directive, $value);
+ $this->set($key, $value);
} else {
$namespace = $key;
$namespace_values = $value;
foreach ($namespace_values as $directive => $value) {
- $this->set($namespace, $directive, $value);
+ $this->set($namespace .'.'. $directive, $value);
}
}
}
}
}
$ret = array();
- foreach ($schema->info as $ns => $keypairs) {
- foreach ($keypairs as $directive => $def) {
- if ($allowed !== true) {
- if (isset($blacklisted_directives["$ns.$directive"])) continue;
- if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
- }
- if (isset($def->isAlias)) continue;
- if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
- $ret[] = array($ns, $directive);
+ foreach ($schema->info as $key => $def) {
+ list($ns, $directive) = explode('.', $key, 2);
+ if ($allowed !== true) {
+ if (isset($blacklisted_directives["$ns.$directive"])) continue;
+ if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
+ if (isset($def->isAlias)) continue;
+ if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
+ $ret[] = array($ns, $directive);
}
return $ret;
}
*/
public function isFinalized($error = false) {
if ($this->finalized && $error) {
- trigger_error($error, E_USER_ERROR);
+ $this->triggerError($error, E_USER_ERROR);
}
return $this->finalized;
}
* already finalized
*/
public function autoFinalize() {
- if (!$this->finalized && $this->autoFinalize) $this->finalize();
+ if ($this->autoFinalize) {
+ $this->finalize();
+ } else {
+ $this->plist->squash(true);
+ }
}
/**
*/
public function finalize() {
$this->finalized = true;
+ unset($this->parser);
+ }
+
+ /**
+ * Produces a nicely formatted error message by supplying the
+ * stack frame information OUTSIDE of HTMLPurifier_Config.
+ */
+ protected function triggerError($msg, $no) {
+ // determine previous stack frame
+ $extra = '';
+ if ($this->chatty) {
+ $trace = debug_backtrace();
+ // zip(tail(trace), trace) -- but PHP is not Haskell har har
+ for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
+ if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
+ continue;
+ }
+ $frame = $trace[$i];
+ $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
+ break;
+ }
+ }
+ trigger_error($msg . $extra, $no);
+ }
+
+ /**
+ * Returns a serialized form of the configuration object that can
+ * be reconstituted.
+ */
+ public function serialize() {
+ $this->getDefinition('HTML');
+ $this->getDefinition('CSS');
+ $this->getDefinition('URI');
+ return serialize($this);
}
}
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
- return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
+ $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
+ $r = unserialize($contents);
+ if (!$r) {
+ $hash = sha1($contents);
+ trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
+ }
+ return $r;
}
/**
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $allow_null Whether or not to allow null values
*/
- public function add($namespace, $name, $default, $type, $allow_null) {
+ public function add($key, $default, $type, $allow_null) {
$obj = new stdclass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) $obj->allow_null = true;
- $this->info[$namespace][$name] = $obj;
- $this->defaults[$namespace][$name] = $default;
- $this->defaultPlist->set("$namespace.$name", $default);
- }
-
- /**
- * Defines a namespace for directives to be put into.
- * @warning This is slightly different from the corresponding static
- * method.
- * @param $namespace Namespace's name
- */
- public function addNamespace($namespace) {
- $this->info[$namespace] = array();
- $this->defaults[$namespace] = array();
+ $this->info[$key] = $obj;
+ $this->defaults[$key] = $default;
+ $this->defaultPlist->set($key, $default);
}
/**
* @param $name Name of Directive
* @param $aliases Hash of aliased values to the real alias
*/
- public function addValueAliases($namespace, $name, $aliases) {
- if (!isset($this->info[$namespace][$name]->aliases)) {
- $this->info[$namespace][$name]->aliases = array();
+ public function addValueAliases($key, $aliases) {
+ if (!isset($this->info[$key]->aliases)) {
+ $this->info[$key]->aliases = array();
}
foreach ($aliases as $alias => $real) {
- $this->info[$namespace][$name]->aliases[$alias] = $real;
+ $this->info[$key]->aliases[$alias] = $real;
}
}
* @param $name Name of directive
* @param $allowed Lookup array of allowed values
*/
- public function addAllowedValues($namespace, $name, $allowed) {
- $this->info[$namespace][$name]->allowed = $allowed;
+ public function addAllowedValues($key, $allowed) {
+ $this->info[$key]->allowed = $allowed;
}
/**
* @param $new_namespace
* @param $new_name Directive that the alias will be to
*/
- public function addAlias($namespace, $name, $new_namespace, $new_name) {
+ public function addAlias($key, $new_key) {
$obj = new stdclass;
- $obj->namespace = $new_namespace;
- $obj->name = $new_name;
+ $obj->key = $new_key;
$obj->isAlias = true;
- $this->info[$namespace][$name] = $obj;
+ $this->info[$key] = $obj;
}
/**
* Replaces any stdclass that only has the type property with type integer.
*/
public function postProcess() {
- foreach ($this->info as $namespace => $info) {
- foreach ($info as $directive => $v) {
- if (count((array) $v) == 1) {
- $this->info[$namespace][$directive] = $v->type;
- } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
- $this->info[$namespace][$directive] = -$v->type;
- }
+ foreach ($this->info as $key => $v) {
+ if (count((array) $v) == 1) {
+ $this->info[$key] = $v->type;
+ } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
+ $this->info[$key] = -$v->type;
}
}
}
- // DEPRECATED METHODS
-
- /** @see HTMLPurifier_ConfigSchema->set() */
- public static function define($namespace, $name, $default, $type, $description) {
- HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
- $type_values = explode('/', $type, 2);
- $type = $type_values[0];
- $modifier = isset($type_values[1]) ? $type_values[1] : false;
- $allow_null = ($modifier === 'null');
- $def = HTMLPurifier_ConfigSchema::instance();
- $def->add($namespace, $name, $default, $type, $allow_null);
- }
-
- /** @see HTMLPurifier_ConfigSchema->addNamespace() */
- public static function defineNamespace($namespace, $description) {
- HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
- $def = HTMLPurifier_ConfigSchema::instance();
- $def->addNamespace($namespace);
- }
-
- /** @see HTMLPurifier_ConfigSchema->addValueAliases() */
- public static function defineValueAliases($namespace, $name, $aliases) {
- HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
- $def = HTMLPurifier_ConfigSchema::instance();
- $def->addValueAliases($namespace, $name, $aliases);
- }
-
- /** @see HTMLPurifier_ConfigSchema->addAllowedValues() */
- public static function defineAllowedValues($namespace, $name, $allowed_values) {
- HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
- $allowed = array();
- foreach ($allowed_values as $value) {
- $allowed[$value] = true;
- }
- $def = HTMLPurifier_ConfigSchema::instance();
- $def->addAllowedValues($namespace, $name, $allowed);
- }
-
- /** @see HTMLPurifier_ConfigSchema->addAlias() */
- public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
- HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
- $def = HTMLPurifier_ConfigSchema::instance();
- $def->addAlias($namespace, $name, $new_namespace, $new_name);
- }
-
- /** @deprecated, use HTMLPurifier_VarParser->parse() */
- public function validate($a, $b, $c = false) {
- trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
- $parser = new HTMLPurifier_VarParser();
- return $parser->parse($a, $b, $c);
- }
-
- /**
- * Throws an E_USER_NOTICE stating that a method is deprecated.
- */
- private static function deprecated($method) {
- trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE);
- }
-
}
// vim: et sw=4 sts=4
public function build($interchange) {
$schema = new HTMLPurifier_ConfigSchema();
- foreach ($interchange->namespaces as $n) {
- $schema->addNamespace($n->namespace);
- }
foreach ($interchange->directives as $d) {
$schema->add(
- $d->id->namespace,
- $d->id->directive,
+ $d->id->key,
$d->default,
$d->type,
$d->typeAllowsNull
);
if ($d->allowed !== null) {
$schema->addAllowedValues(
- $d->id->namespace,
- $d->id->directive,
+ $d->id->key,
$d->allowed
);
}
foreach ($d->aliases as $alias) {
$schema->addAlias(
- $alias->namespace,
- $alias->directive,
- $d->id->namespace,
- $d->id->directive
+ $alias->key,
+ $d->id->key
);
}
if ($d->valueAliases !== null) {
$schema->addValueAliases(
- $d->id->namespace,
- $d->id->directive,
+ $d->id->key,
$d->valueAliases
);
}
{
protected $interchange;
+ private $namespace;
protected function writeHTMLDiv($html) {
$this->startElement('div');
$this->startElement('configdoc');
$this->writeElement('title', $interchange->name);
- foreach ($interchange->namespaces as $namespace) {
- $this->buildNamespace($namespace);
+ foreach ($interchange->directives as $directive) {
+ $this->buildDirective($directive);
}
+ if ($this->namespace) $this->endElement(); // namespace
+
$this->endElement(); // configdoc
$this->flush();
}
- public function buildNamespace($namespace) {
- $this->startElement('namespace');
- $this->writeAttribute('id', $namespace->namespace);
-
- $this->writeElement('name', $namespace->namespace);
- $this->startElement('description');
- $this->writeHTMLDiv($namespace->description);
- $this->endElement(); // description
+ public function buildDirective($directive) {
- foreach ($this->interchange->directives as $directive) {
- if ($directive->id->namespace !== $namespace->namespace) continue;
- $this->buildDirective($directive);
+ // Kludge, although I suppose having a notion of a "root namespace"
+ // certainly makes things look nicer when documentation is built.
+ // Depends on things being sorted.
+ if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
+ if ($this->namespace) $this->endElement(); // namespace
+ $this->namespace = $directive->id->getRootNamespace();
+ $this->startElement('namespace');
+ $this->writeAttribute('id', $this->namespace);
+ $this->writeElement('name', $this->namespace);
}
- $this->endElement(); // namespace
- }
-
- public function buildDirective($directive) {
$this->startElement('directive');
$this->writeAttribute('id', $directive->id->toString());
- $this->writeElement('name', $directive->id->directive);
+ $this->writeElement('name', $directive->id->getDirective());
$this->startElement('aliases');
foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
*/
public $name;
- /**
- * Array of Namespace ID => array(namespace info)
- */
- public $namespaces = array();
-
/**
* Array of Directive ID => array(directive info)
*/
public $directives = array();
- /**
- * Adds a namespace array to $namespaces
- */
- public function addNamespace($namespace) {
- if (isset($this->namespaces[$i = $namespace->namespace])) {
- throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'");
- }
- $this->namespaces[$i] = $namespace;
- }
-
/**
* Adds a directive array to $directives
*/
class HTMLPurifier_ConfigSchema_Interchange_Id
{
- public $namespace, $directive;
+ public $key;
- public function __construct($namespace, $directive) {
- $this->namespace = $namespace;
- $this->directive = $directive;
+ public function __construct($key) {
+ $this->key = $key;
}
/**
* cause problems for PHP 5.0 support.
*/
public function toString() {
- return $this->namespace . '.' . $this->directive;
+ return $this->key;
+ }
+
+ public function getRootNamespace() {
+ return substr($this->key, 0, strpos($this->key, "."));
+ }
+
+ public function getDirective() {
+ return substr($this->key, strpos($this->key, ".") + 1);
}
public static function make($id) {
- list($namespace, $directive) = explode('.', $id);
- return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive);
+ return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
}
}
}
public static function buildFromDirectory($dir = null) {
- $parser = new HTMLPurifier_StringHashParser();
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
+ return $builder->buildDir($interchange, $dir);
+ }
- if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
- $info = parse_ini_file($dir . 'info.ini');
- $interchange->name = $info['name'];
+ public function buildDir($interchange, $dir = null) {
+ if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema';
+ if (file_exists($dir . '/info.ini')) {
+ $info = parse_ini_file($dir . '/info.ini');
+ $interchange->name = $info['name'];
+ }
$files = array();
$dh = opendir($dir);
sort($files);
foreach ($files as $file) {
- $builder->build(
- $interchange,
- new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
- );
+ $this->buildFile($interchange, $dir . '/' . $file);
}
return $interchange;
}
+ public function buildFile($interchange, $file) {
+ $parser = new HTMLPurifier_StringHashParser();
+ $this->build(
+ $interchange,
+ new HTMLPurifier_StringHash( $parser->parseFile($file) )
+ );
+ }
+
/**
* Builds an interchange object based on a hash.
* @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
}
if (strpos($hash['ID'], '.') === false) {
- $this->buildNamespace($interchange, $hash);
+ if (count($hash) == 2 && isset($hash['DESCRIPTION'])) {
+ $hash->offsetGet('DESCRIPTION'); // prevent complaining
+ } else {
+ throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace');
+ }
} else {
$this->buildDirective($interchange, $hash);
}
$this->_findUnused($hash);
}
- public function buildNamespace($interchange, $hash) {
- $namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace();
- $namespace->namespace = $hash->offsetGet('ID');
- if (isset($hash['DESCRIPTION'])) {
- $namespace->description = $hash->offsetGet('DESCRIPTION');
- }
- $interchange->addNamespace($namespace);
- }
-
public function buildDirective($interchange, $hash) {
$directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();
$this->aliases = array();
// PHP is a bit lax with integer <=> string conversions in
// arrays, so we don't use the identical !== comparison
- foreach ($interchange->namespaces as $i => $namespace) {
- if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'");
- $this->validateNamespace($namespace);
- }
foreach ($interchange->directives as $i => $directive) {
$id = $directive->id->toString();
if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
return true;
}
- /**
- * Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object.
- */
- public function validateNamespace($n) {
- $this->context[] = "namespace '{$n->namespace}'";
- $this->with($n, 'namespace')
- ->assertNotEmpty()
- ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
- $this->with($n, 'description')
- ->assertNotEmpty()
- ->assertIsString(); // handled by InterchangeBuilder
- array_pop($this->context);
- }
-
/**
* Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
*/
// handled by InterchangeBuilder
$this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
}
- if (!isset($this->interchange->namespaces[$id->namespace])) {
- $this->error('namespace', 'does not exist'); // assumes that the namespace was validated already
- }
- $this->with($id, 'directive')
+ // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.)
+ // we probably should check that it has at least one namespace
+ $this->with($id, 'key')
->assertNotEmpty()
- ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
+ ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder
array_pop($this->context);
}
--- /dev/null
+Attr.AllowedClasses
+TYPE: lookup/null
+VERSION: 4.0.0
+DEFAULT: null
+--DESCRIPTION--
+List of allowed class values in the class attribute. By default, this is null,
+which means all classes are allowed.
+--# vim: et sw=4 sts=4
--- /dev/null
+Attr.ClassUseCDATA
+TYPE: bool/null
+DEFAULT: null
+VERSION: 4.0.0
+--DESCRIPTION--
+If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
+XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
+it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
+is forced; if false, the NMTOKENS definition is forced. To get behavior
+of HTML Purifier prior to 4.0.0, set this directive to false.
+
+Some rational behind the auto-detection:
+in previous versions of HTML Purifier, it was assumed that the form of
+class was NMTOKENS, as specified by the XHTML Modularization (representing
+XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
+specify class as CDATA. HTML 5 effectively defines it as CDATA, but
+with the additional constraint that each name should be unique (this is not
+explicitly outlined in previous specifications).
+--# vim: et sw=4 sts=4
--- /dev/null
+Attr.ForbiddenClasses
+TYPE: lookup
+VERSION: 4.0.0
+DEFAULT: array()
+--DESCRIPTION--
+List of forbidden class values in the class attribute. By default, this is
+empty, which means that no classes are forbidden. See also %Attr.AllowedClasses.
+--# vim: et sw=4 sts=4
--- /dev/null
+AutoFormat.PurifierLinkify.DocURL
+TYPE: string
+VERSION: 2.0.1
+DEFAULT: '#%s'
+ALIASES: AutoFormatParam.PurifierLinkifyDocURL
+--DESCRIPTION--
+<p>
+ Location of configuration documentation to link to, let %s substitute
+ into the configuration's namespace and directive names sans the percent
+ sign.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions
+TYPE: lookup
+VERSION: 4.0.0
+DEFAULT: array('td' => true, 'th' => true)
+--DESCRIPTION--
+<p>
+ When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
+ are enabled, this directive defines what HTML elements should not be
+ removede if they have only a non-breaking space in them.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+AutoFormat.RemoveEmpty.RemoveNbsp
+TYPE: bool
+VERSION: 4.0.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ When enabled, HTML Purifier will treat any elements that contain only
+ non-breaking spaces as well as regular whitespace as empty, and remove
+ them when %AutoForamt.RemoveEmpty is enabled.
+</p>
+<p>
+ See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
+ that don't have this behavior applied to them.
+</p>
+--# vim: et sw=4 sts=4
</p>
<p>
Elements that contain only whitespace will be treated as empty. Non-breaking
- spaces, however, do not count as whitespace.
+ spaces, however, do not count as whitespace. See
+ %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
</p>
<p>
This algorithm is not perfect; you may still notice some empty tags,
because they were not permitted in that context, or tags that, after
being auto-closed by another tag, where empty. This is for safety reasons
to prevent clever code from breaking validation. The general rule of thumb:
- if a tag looked empty on the way end, it will get removed; if HTML Purifier
+ if a tag looked empty on the way in, it will get removed; if HTML Purifier
made it empty, it will stay.
</p>
--# vim: et sw=4 sts=4
--- /dev/null
+AutoFormat.RemoveSpansWithoutAttributes
+TYPE: bool
+VERSION: 4.0.1
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ This directive causes <code>span</code> tags without any attributes
+ to be removed. It will also remove spans that had all attributes
+ removed during processing.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+CSS.AllowedFonts
+TYPE: lookup/null
+VERSION: 4.3.0
+DEFAULT: NULL
+--DESCRIPTION--
+<p>
+ Allows you to manually specify a set of allowed fonts. If
+ <code>NULL</code>, all fonts are allowed. This directive
+ affects generic names (serif, sans-serif, monospace, cursive,
+ fantasy) as well as specific font families.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+CSS.ForbiddenProperties
+TYPE: lookup
+VERSION: 4.2.0
+DEFAULT: array()
+--DESCRIPTION--
+<p>
+ This is the logical inverse of %CSS.AllowedProperties, and it will
+ override that directive or any other directive. If possible,
+ %CSS.AllowedProperties is recommended over this directive,
+ because it can sometimes be difficult to tell whether or not you've
+ forbidden all of the CSS properties you truly would like to disallow.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+CSS.Trusted
+TYPE: bool
+VERSION: 4.2.1
+DEFAULT: false
+--DESCRIPTION--
+Indicates whether or not the user's CSS input is trusted or not. If the
+input is trusted, a more expansive set of allowed properties. See
+also %HTML.Trusted.
+--# vim: et sw=4 sts=4
--- /dev/null
+Cache.SerializerPermissions
+TYPE: int
+VERSION: 4.3.0
+DEFAULT: 0755
+--DESCRIPTION--
+
+<p>
+ Directory permissions of the files and directories created inside
+ the DefinitionCache/Serializer or other custom serializer path.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Core.NormalizeNewlines
+TYPE: bool
+VERSION: 4.2.0
+DEFAULT: true
+--DESCRIPTION--
+<p>
+ Whether or not to normalize newlines to the operating
+ system default. When <code>false</code>, HTML Purifier
+ will attempt to preserve mixed newline files.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Core.RemoveProcessingInstructions
+TYPE: bool
+VERSION: 4.2.0
+DEFAULT: false
+--DESCRIPTION--
+Instead of escaping processing instructions in the form <code><? ...
+?></code>, remove it out-right. This may be useful if the HTML
+you are validating contains XML processing instruction gunk, however,
+it can also be user-unfriendly for people attempting to post PHP
+snippets.
+--# vim: et sw=4 sts=4
--- /dev/null
+Filter.ExtractStyleBlocks.Escaping
+TYPE: bool
+VERSION: 3.0.0
+DEFAULT: true
+ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping
+--DESCRIPTION--
+
+<p>
+ Whether or not to escape the dangerous characters <, > and &
+ as \3C, \3E and \26, respectively. This is can be safely set to false
+ if the contents of StyleBlocks will be placed in an external stylesheet,
+ where there is no risk of it being interpreted as HTML.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Filter.ExtractStyleBlocks.Scope
+TYPE: string/null
+VERSION: 3.0.0
+DEFAULT: NULL
+ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope
+--DESCRIPTION--
+
+<p>
+ If you would like users to be able to define external stylesheets, but
+ only allow them to specify CSS declarations for a specific node and
+ prevent them from fiddling with other elements, use this directive.
+ It accepts any valid CSS selector, and will prepend this to any
+ CSS declaration extracted from the document. For example, if this
+ directive is set to <code>#user-content</code> and a user uses the
+ selector <code>a:hover</code>, the final selector will be
+ <code>#user-content a:hover</code>.
+</p>
+<p>
+ The comma shorthand may be used; consider the above example, with
+ <code>#user-content, #user-content2</code>, the final selector will
+ be <code>#user-content a:hover, #user-content2 a:hover</code>.
+</p>
+<p>
+ <strong>Warning:</strong> It is possible for users to bypass this measure
+ using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
+ Purifier, and I am working to get it fixed. Until then, HTML Purifier
+ performs a basic check to prevent this.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Filter.ExtractStyleBlocks.TidyImpl
+TYPE: mixed/null
+VERSION: 3.1.0
+DEFAULT: NULL
+ALIASES: FilterParam.ExtractStyleBlocksTidyImpl
+--DESCRIPTION--
+<p>
+ If left NULL, HTML Purifier will attempt to instantiate a <code>csstidy</code>
+ class to use for internal cleaning. This will usually be good enough.
+</p>
+<p>
+ However, for trusted user input, you can set this to <code>false</code> to
+ disable cleaning. In addition, you can supply your own concrete implementation
+ of Tidy's interface to use, although I don't know why you'd want to do that.
+</p>
+--# vim: et sw=4 sts=4
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
+<p>
+ <strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and
+ %Output.FlashCompat (turn both on to allow YouTube videos and other
+ Flash content).
+</p>
<p>
This directive enables YouTube video embedding in HTML Purifier. Check
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document
--DESCRIPTION--
<p>
- This is a convenience directive that rolls the functionality of
- %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
+ This is a preferred convenience directive that combines
+ %HTML.AllowedElements and %HTML.AllowedAttributes.
Specify elements and attributes that are allowed using:
- <code>element1[attr1|attr2],element2...</code>. You can also use
- newlines instead of commas to separate elements.
+ <code>element1[attr1|attr2],element2...</code>. For example,
+ if you would like to only allow paragraphs and links, specify
+ <code>a[href],p</code>. You can specify attributes that apply
+ to all elements using an asterisk, e.g. <code>*[lang]</code>.
+ You can also use newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:
DEFAULT: NULL
--DESCRIPTION--
<p>
- If HTML Purifier's tag set is unsatisfactory for your needs, you
- can overload it with your own list of tags to allow. Note that this
- method is subtractive: it does its job by taking away from HTML Purifier
- usual feature set, so you cannot add a tag that HTML Purifier never
- supported in the first place (like embed, form or head). If you
- change this, you probably also want to change %HTML.AllowedAttributes.
+ If HTML Purifier's tag set is unsatisfactory for your needs, you can
+ overload it with your own list of tags to allow. If you change
+ this, you probably also want to change %HTML.AllowedAttributes; see
+ also %HTML.Allowed which lets you set allowed elements and
+ attributes at the same time.
+</p>
+<p>
+ If you attempt to allow an element that HTML Purifier does not know
+ about, HTML Purifier will raise an error. You will need to manually
+ tell HTML Purifier about this element by using the
+ <a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a>
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
--- /dev/null
+HTML.Attr.Name.UseCDATA
+TYPE: bool
+DEFAULT: false
+VERSION: 4.0.0
+--DESCRIPTION--
+The W3C specification DTD defines the name attribute to be CDATA, not ID, due
+to limitations of DTD. In certain documents, this relaxed behavior is desired,
+whether it is to specify duplicate names, or to specify names that would be
+illegal IDs (for example, names that begin with a digit.) Set this configuration
+directive to true to use the relaxed parsing rules.
+--# vim: et sw=4 sts=4
--- /dev/null
+HTML.FlashAllowFullScreen
+TYPE: bool
+VERSION: 4.2.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ Whether or not to permit embedded Flash content from
+ %HTML.SafeObject to expand to the full screen. Corresponds to
+ the <code>allowFullScreen</code> parameter.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+HTML.Nofollow
+TYPE: bool
+VERSION: 4.3.0
+DEFAULT: FALSE
+--DESCRIPTION--
+If enabled, nofollow rel attributes are added to all outgoing links.
+--# vim: et sw=4 sts=4
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
- element and will cause your website to stop validating. You probably want
- to enable this with %HTML.SafeObject.
- <strong>Highly experimental.</strong>
-</p>
+ element and will cause your website to stop validating; you should
+ see if you can use %Output.FlashCompat with %HTML.SafeObject instead
+ first.</p>
--# vim: et sw=4 sts=4
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
- what websites like MySpace do to object tags. You may also want to
- enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
- although embed tags will cause your website to stop validating.
- <strong>Highly experimental.</strong>
+ what websites like MySpace do to object tags. You should also enable
+ %Output.FlashCompat in order to generate Internet Explorer
+ compatibility code for your object tags.
</p>
--# vim: et sw=4 sts=4
--DESCRIPTION--
Indicates whether or not the user input is trusted or not. If the input is
trusted, a more expansive set of allowed tags and attributes will be used.
+See also %CSS.Trusted.
--# vim: et sw=4 sts=4
--- /dev/null
+Output.FixInnerHTML
+TYPE: bool
+VERSION: 4.3.0
+DEFAULT: true
+--DESCRIPTION--
+<p>
+ If true, HTML Purifier will protect against Internet Explorer's
+ mishandling of the <code>innerHTML</code> attribute by appending
+ a space to any attribute that does not contain angled brackets, spaces
+ or quotes, but contains a backtick. This slightly changes the
+ semantics of any given attribute, so if this is unacceptable and
+ you do not use <code>innerHTML</code> on any of your pages, you can
+ turn this directive off.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Output.FlashCompat
+TYPE: bool
+VERSION: 4.1.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ If true, HTML Purifier will generate Internet Explorer compatibility
+ code for all object code. This is highly recommended if you enable
+ %HTML.SafeObject.
+</p>
+--# vim: et sw=4 sts=4
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
+There is also support for the <code>data</code> and <code>file</code>
+URI schemes, but they are not enabled by default.
--# vim: et sw=4 sts=4
URI.DisableResources
TYPE: bool
-VERSION: 1.3.0
+VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
-
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea.
</p>
+<p>
+ <em>Note:</em> While this directive has been available since 1.3.0,
+ it didn't actually start doing anything until 4.2.0.
+</p>
--# vim: et sw=4 sts=4
*/
public $setup = false;
+ /**
+ * If true, write out the final definition object to the cache after
+ * setup. This will be true only if all invocations to get a raw
+ * definition object are also optimized. This does not cause file
+ * system thrashing because on subsequent calls the cached object
+ * is used and any writes to the raw definition object are short
+ * circuited. See enduser-customize.html for the high-level
+ * picture.
+ */
+ public $optimized = null;
+
/**
* What type of definition is it?
*/
public function generateKey($config) {
return $config->version . ',' . // possibly replace with function calls
$config->getBatchSerial($this->type) . ',' .
- $config->get($this->type, 'DefinitionRev');
+ $config->get($this->type . '.DefinitionRev');
}
/**
// versions match, ids match, check revision number
if (
$hash == $config->getBatchSerial($this->type) &&
- $revision < $config->get($this->type, 'DefinitionRev')
+ $revision < $config->get($this->type . '.DefinitionRev')
) return true;
return false;
}
$file = $this->generateFilePath($config);
if (file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
- return $this->_write($file, serialize($def));
+ return $this->_write($file, serialize($def), $config);
}
public function set($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!$this->_prepareDir($config)) return false;
- return $this->_write($file, serialize($def));
+ return $this->_write($file, serialize($def), $config);
}
public function replace($def, $config) {
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
- return $this->_write($file, serialize($def));
+ return $this->_write($file, serialize($def), $config);
}
public function get($config) {
* @todo Make protected
*/
public function generateBaseDirectoryPath($config) {
- $base = $config->get('Cache', 'SerializerPath');
+ $base = $config->get('Cache.SerializerPath');
$base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
return $base;
}
* Convenience wrapper function for file_put_contents
* @param $file File name to write to
* @param $data Data to write into file
+ * @param $config Config object
* @return Number of bytes written if success, or false if failure.
*/
- private function _write($file, $data) {
- return file_put_contents($file, $data);
+ private function _write($file, $data, $config) {
+ $result = file_put_contents($file, $data);
+ if ($result !== false) {
+ // set permissions of the new file (no execute)
+ $chmod = $config->get('Cache.SerializerPermissions');
+ if (!$chmod) {
+ $chmod = 0644; // invalid config or simpletest
+ }
+ $chmod = $chmod & 0666;
+ chmod($file, $chmod);
+ }
+ return $result;
}
/**
* Prepares the directory that this type stores the serials in
+ * @param $config Config object
* @return True if successful
*/
private function _prepareDir($config) {
$directory = $this->generateDirectoryPath($config);
+ $chmod = $config->get('Cache.SerializerPermissions');
+ if (!$chmod) {
+ $chmod = 0755; // invalid config or simpletest
+ }
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
if (!is_dir($base)) {
please create or change using %Cache.SerializerPath',
E_USER_WARNING);
return false;
- } elseif (!$this->_testPermissions($base)) {
+ } elseif (!$this->_testPermissions($base, $chmod)) {
return false;
}
- $old = umask(0022); // disable group and world writes
- mkdir($directory);
+ $old = umask(0000);
+ mkdir($directory, $chmod);
umask($old);
- } elseif (!$this->_testPermissions($directory)) {
+ } elseif (!$this->_testPermissions($directory, $chmod)) {
return false;
}
return true;
/**
* Tests permissions on a directory and throws out friendly
* error messages and attempts to chmod it itself if possible
+ * @param $dir Directory path
+ * @param $chmod Permissions
+ * @return True if directory writable
*/
- private function _testPermissions($dir) {
+ private function _testPermissions($dir, $chmod) {
// early abort, if it is writable, everything is hunky-dory
if (is_writable($dir)) return true;
if (!is_dir($dir)) {
// POSIX system, we can give more specific advice
if (fileowner($dir) === posix_getuid()) {
// we can chmod it ourselves
- chmod($dir, 0755);
- return true;
+ $chmod = $chmod | 0700;
+ if (chmod($dir, $chmod)) return true;
} elseif (filegroup($dir) === posix_getgid()) {
- $chmod = '775';
+ $chmod = $chmod | 0070;
} else {
// PHP's probably running as nobody, so we'll
// need to give global permissions
- $chmod = '777';
+ $chmod = $chmod | 0777;
}
trigger_error('Directory '.$dir.' not writable, '.
- 'please chmod to ' . $chmod,
+ 'please chmod to ' . decoct($chmod),
E_USER_WARNING);
} else {
// generic error message
* @param $config Instance of HTMLPurifier_Config
*/
public function create($type, $config) {
- $method = $config->get('Cache', 'DefinitionImpl');
+ $method = $config->get('Cache.DefinitionImpl');
if ($method === null) {
return new HTMLPurifier_DefinitionCache_Null($type);
}
*/
public function getDoctypeFromConfig($config) {
// recommended test
- $doctype = $config->get('HTML', 'Doctype');
+ $doctype = $config->get('HTML.Doctype');
if (!empty($doctype)) return $doctype;
- $doctype = $config->get('HTML', 'CustomDoctype');
+ $doctype = $config->get('HTML.CustomDoctype');
if (!empty($doctype)) return $doctype;
// backwards-compatibility
- if ($config->get('HTML', 'XHTML')) {
+ if ($config->get('HTML.XHTML')) {
$doctype = 'XHTML 1.0';
} else {
$doctype = 'HTML 4.01';
}
- if ($config->get('HTML', 'Strict')) {
+ if ($config->get('HTML.Strict')) {
$doctype .= ' Strict';
} else {
$doctype .= ' Transitional';
*/
public $autoclose = array();
+ /**
+ * If a foreign element is found in this element, test if it is
+ * allowed by this sub-element; if it is, instead of closing the
+ * current element, place it inside this element.
+ */
+ public $wrap;
+
/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.
$this->_mergeAssocArray($this->excludes, $def->excludes);
if(!empty($def->content_model)) {
- $this->content_model .= ' | ' . $def->content_model;
+ $this->content_model =
+ str_replace("#SUPER", $this->content_model, $def->content_model);
$this->child = false;
}
if(!empty($def->content_model_type)) {
/**
* Error-handler that mutes errors, alternative to shut-up operator.
*/
- private static function muteErrorHandler() {}
+ public static function muteErrorHandler() {}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
* Converts a string to UTF-8 based on configuration.
*/
public static function convertToUTF8($str, $config, $context) {
- $encoding = $config->get('Core', 'Encoding');
+ $encoding = $config->get('Core.Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
- if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
+ if ($iconv && !$config->get('Test.ForceNoIconv')) {
$str = iconv($encoding, 'utf-8//IGNORE', $str);
if ($str === false) {
// $encoding is not a valid encoding
* characters being omitted.
*/
public static function convertFromUTF8($str, $config, $context) {
- $encoding = $config->get('Core', 'Encoding');
+ $encoding = $config->get('Core.Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
- if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) {
+ if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
- if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
+ if ($iconv && !$config->get('Test.ForceNoIconv')) {
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) {
-a:246:{s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"";s:3:"zwj";s:3:"";s:3:"lrm";s:3:"";s:3:"rlm";s:3:"";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";}
\ No newline at end of file
+a:253:{s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:6:"there4";s:3:"∴";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"";s:3:"zwj";s:3:"";s:3:"lrm";s:3:"";s:3:"rlm";s:3:"";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:4:"sup2";s:2:"²";s:4:"sup3";s:2:"³";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"sup1";s:2:"¹";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"frac14";s:2:"¼";s:6:"frac12";s:2:"½";s:6:"frac34";s:2:"¾";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";}
\ No newline at end of file
* @todo Extend to indicate non-text/css style blocks
*/
public function preFilter($html, $config, $context) {
- $tidy = $config->get('FilterParam', 'ExtractStyleBlocksTidyImpl');
+ $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl');
if ($tidy !== null) $this->_tidy = $tidy;
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
$style_blocks = $this->_styleMatches;
*/
public function cleanCSS($css, $config, $context) {
// prepare scope
- $scope = $config->get('FilterParam', 'ExtractStyleBlocksScope');
+ $scope = $config->get('Filter.ExtractStyleBlocks.Scope');
if ($scope !== null) {
$scopes = array_map('trim', explode(',', $scope));
} else {
$css = $this->_tidy->print->plain();
// we are going to escape any special characters <>& to ensure
// that no funny business occurs (i.e. </style> in a font-family prop).
- if ($config->get('FilterParam', 'ExtractStyleBlocksEscaping')) {
+ if ($config->get('Filter.ExtractStyleBlocks.Escaping')) {
$css = str_replace(
array('<', '>', '&'),
array('\3C ', '\3E ', '\26 '),
public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'.
- 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
+ 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
}
public function postFilter($html, $config, $context) {
- $post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
+ $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
}
protected function postFilterCallback($matches) {
$url = $this->armorUrl($matches[1]);
return '<object width="425" height="350" type="application/x-shockwave-flash" '.
- 'data="http://www.youtube.com/v/'.$url.'">'.
- '<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'.
+ 'data="http://www.youtube.com/'.$url.'">'.
+ '<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'.
'<!--[if IE]>'.
- '<embed src="http://www.youtube.com/v/'.$url.'"'.
+ '<embed src="http://www.youtube.com/'.$url.'"'.
'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'.
*/
private $_sortAttr;
+ /**
+ * Cache of %Output.FlashCompat
+ */
+ private $_flashCompat;
+
+ /**
+ * Cache of %Output.FixInnerHTML
+ */
+ private $_innerHTMLFix;
+
+ /**
+ * Stack for keeping track of object information when outputting IE
+ * compatibility code.
+ */
+ private $_flashStack = array();
+
/**
* Configuration for the generator
*/
*/
public function __construct($config, $context) {
$this->config = $config;
- $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
- $this->_sortAttr = $config->get('Output', 'SortAttr');
+ $this->_scriptFix = $config->get('Output.CommentScriptContents');
+ $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
+ $this->_sortAttr = $config->get('Output.SortAttr');
+ $this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
}
}
// Tidy cleanup
- if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) {
+ if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
$tidy = new Tidy;
$tidy->parseString($html, array(
'indent'=> true,
}
// Normalize newlines to system defined value
- $nl = $this->config->get('Output', 'Newline');
- if ($nl === null) $nl = PHP_EOL;
- if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
+ if ($this->config->get('Core.NormalizeNewlines')) {
+ $nl = $this->config->get('Output.Newline');
+ if ($nl === null) $nl = PHP_EOL;
+ if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
+ }
return $html;
}
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
+ if ($this->_flashCompat) {
+ if ($token->name == "object") {
+ $flash = new stdclass();
+ $flash->attr = $token->attr;
+ $flash->param = array();
+ $this->_flashStack[] = $flash;
+ }
+ }
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token instanceof HTMLPurifier_Token_End) {
- return '</' . $token->name . '>';
+ $_extra = '';
+ if ($this->_flashCompat) {
+ if ($token->name == "object" && !empty($this->_flashStack)) {
+ // doesn't do anything for now
+ }
+ }
+ return $_extra . '</' . $token->name . '>';
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
+ if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
+ $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
+ }
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
continue;
}
}
+ // Workaround for Internet Explorer innerHTML bug.
+ // Essentially, Internet Explorer, when calculating
+ // innerHTML, omits quotes if there are no instances of
+ // angled brackets, quotes or spaces. However, when parsing
+ // HTML (for example, when you assign to innerHTML), it
+ // treats backticks as quotes. Thus,
+ // <img alt="``" />
+ // becomes
+ // <img alt=`` />
+ // becomes
+ // <img alt='' />
+ // Fortunately, all we need to do is trigger an appropriate
+ // quoting style, which we do by adding an extra space.
+ // This also is consistent with the W3C spec, which states
+ // that user agents may ignore leading or trailing
+ // whitespace (in fact, most don't, at least for attributes
+ // like alt, but an extra space at the end is barely
+ // noticeable). Still, we have a configuration knob for
+ // this, since this transformation is not necesary if you
+ // don't process user input with innerHTML or you don't plan
+ // on supporting Internet Explorer.
+ if ($this->_innerHTMLFix) {
+ if (strpos($value, '`') !== false) {
+ // check if correct quoting style would not already be
+ // triggered
+ if (strcspn($value, '"\' <>') === strlen($value)) {
+ // protect!
+ $value .= ' ';
+ }
+ }
+ }
$html .= $key.'="'.$this->escape($value).'" ';
}
return rtrim($html);
* permissible for non-attribute output.
* @return String escaped data.
*/
- public function escape($string, $quote = ENT_COMPAT) {
+ public function escape($string, $quote = null) {
+ // Workaround for APC bug on Mac Leopard reported by sidepodcast
+ // http://htmlpurifier.org/phorum/read.php?3,4823,4846
+ if ($quote === null) $quote = ENT_COMPAT;
return htmlspecialchars($string, $quote, 'UTF-8');
}
* @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter and return value descriptions.
*/
- public function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+ public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
$module = $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
*/
protected function setupConfigStuff($config) {
- $block_wrapper = $config->get('HTML', 'BlockWrapper');
+ $block_wrapper = $config->get('HTML.BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper;
} else {
E_USER_ERROR);
}
- $parent = $config->get('HTML', 'Parent');
+ $parent = $config->get('HTML.Parent');
$def = $this->manager->getElement($parent, true);
if ($def) {
$this->info_parent = $parent;
// setup allowed elements -----------------------------------------
- $allowed_elements = $config->get('HTML', 'AllowedElements');
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early
+ $allowed_elements = $config->get('HTML.AllowedElements');
+ $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
- $allowed = $config->get('HTML', 'Allowed');
+ $allowed = $config->get('HTML.Allowed');
if (is_string($allowed)) {
list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
}
unset($allowed_attributes_mutable[$key]);
}
}
- if ($delete) unset($this->info[$tag]->attr[$attr]);
+ if ($delete) {
+ if ($this->info[$tag]->attr[$attr]->required) {
+ trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
+ }
+ unset($this->info[$tag]->attr[$attr]);
+ }
}
}
// emit errors
// setup forbidden elements ---------------------------------------
- $forbidden_elements = $config->get('HTML', 'ForbiddenElements');
- $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes');
+ $forbidden_elements = $config->get('HTML.ForbiddenElements');
+ $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
foreach ($this->info as $tag => $info) {
if (isset($forbidden_elements[$tag])) {
'Core' => array(
0 => array('Style'),
// 'xml:space' => false,
- 'class' => 'NMTOKENS',
+ 'class' => 'Class',
'id' => 'ID',
'title' => 'CDATA',
),
0 => array('Core', 'I18N')
)
);
+
}
// vim: et sw=4 sts=4
public $name = 'Image';
public function setup($config) {
- $max = $config->get('HTML', 'MaxImgLength');
+ $max = $config->get('HTML.MaxImgLength');
$img = $this->addElement(
'img', 'Inline', 'Empty', 'Common',
array(
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
)
);
- if ($max === null || $config->get('HTML', 'Trusted')) {
+ if ($max === null || $config->get('HTML.Trusted')) {
$img->attr['height'] =
$img->attr['width'] = 'Length';
}
public $content_sets = array('Flow' => 'List');
public function setup($config) {
- $this->addElement('ol', 'List', 'Required: li', 'Common');
- $this->addElement('ul', 'List', 'Required: li', 'Common');
+ $ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
+ $ol->wrap = "li";
+ $ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
+ $ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');
$elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
foreach ($elements as $name) {
$element = $this->addBlankElement($name);
- $element->attr['name'] = 'ID';
+ $element->attr['name'] = 'CDATA';
+ if (!$config->get('HTML.Attr.Name.UseCDATA')) {
+ $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync();
+ }
}
}
--- /dev/null
+<?php
+
+/**
+ * Module adds the nofollow attribute transformation to a tags. It
+ * is enabled by HTML.Nofollow
+ */
+class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
+{
+
+ public $name = 'Nofollow';
+
+ public function setup($config) {
+ $a = $this->addBlankElement('a');
+ $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
+ }
+
+}
+
+// vim: et sw=4 sts=4
public function setup($config) {
- $max = $config->get('HTML', 'MaxImgLength');
+ $max = $config->get('HTML.MaxImgLength');
$embed = $this->addElement(
'embed', 'Inline', 'Empty', 'Common',
array(
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
- 'wmode' => 'Enum#window',
+ 'flashvars' => 'Text',
+ 'wmode' => 'Enum#window,transparent,opaque',
'name' => 'ID',
)
);
// These definitions are not intrinsically safe: the attribute transforms
// are a vital part of ensuring safety.
- $max = $config->get('HTML', 'MaxImgLength');
+ $max = $config->get('HTML.MaxImgLength');
$object = $this->addElement(
'object',
'Inline',
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
- 'data' => 'URI#embedded'
+ 'data' => 'URI#embedded',
+ 'codebase' => new HTMLPurifier_AttrDef_Enum(array(
+ 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
$this->makeFixesForLevel($fixes);
// figure out which fixes to use
- $level = $config->get('HTML', 'TidyLevel');
+ $level = $config->get('HTML.TidyLevel');
$fixes_lookup = $this->getFixesForLevel($level);
// get custom fix declarations: these need namespace processing
- $add_fixes = $config->get('HTML', 'TidyAdd');
- $remove_fixes = $config->get('HTML', 'TidyRemove');
+ $add_fixes = $config->get('HTML.TidyAdd');
+ $remove_fixes = $config->get('HTML.TidyRemove');
foreach ($fixes as $name => $fix) {
// needs to be refactored a little to implement globbing
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
+ $r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r;
}
*/
public function setup($config) {
- $this->trusted = $config->get('HTML', 'Trusted');
+ $this->trusted = $config->get('HTML.Trusted');
// generate
$this->doctype = $this->doctypes->make($config);
$modules = $this->doctype->modules;
// take out the default modules that aren't allowed
- $lookup = $config->get('HTML', 'AllowedModules');
- $special_cases = $config->get('HTML', 'CoreModules');
+ $lookup = $config->get('HTML.AllowedModules');
+ $special_cases = $config->get('HTML.CoreModules');
if (is_array($lookup)) {
foreach ($modules as $k => $m) {
}
}
- // add proprietary module (this gets special treatment because
- // it is completely removed from doctypes, etc.)
- if ($config->get('HTML', 'Proprietary')) {
+ // custom modules
+ if ($config->get('HTML.Proprietary')) {
$modules[] = 'Proprietary';
}
-
- // add SafeObject/Safeembed modules
- if ($config->get('HTML', 'SafeObject')) {
+ if ($config->get('HTML.SafeObject')) {
$modules[] = 'SafeObject';
}
- if ($config->get('HTML', 'SafeEmbed')) {
+ if ($config->get('HTML.SafeEmbed')) {
$modules[] = 'SafeEmbed';
}
+ if ($config->get('HTML.Nofollow')) {
+ $modules[] = 'Nofollow';
+ }
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
*/
public static function build($config, $context) {
$id_accumulator = new HTMLPurifier_IDAccumulator();
- $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
+ $id_accumulator->load($config->get('Attr.IDBlacklist'));
return $id_accumulator;
}
if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
return false;
}
+ // check for exclusion
+ for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
+ $node = $this->currentNesting[$i];
+ $def = $this->htmlDefinition->info[$node->name];
+ if (isset($def->excludes[$name])) return false;
+ }
return true;
}
// ----
// This is a degenerate case
} else {
- // State 1.2: PAR1
- // ----
+ if (!$token->is_whitespace || $this->_isInline($current)) {
+ // State 1.2: PAR1
+ // ----
- // State 1.3: PAR1\n\nPAR2
- // ------------
+ // State 1.3: PAR1\n\nPAR2
+ // ------------
- // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
- // ------------
- $token = array($this->_pStart());
- $this->_splitText($text, $token);
+ // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
+ // ------------
+ $token = array($this->_pStart());
+ $this->_splitText($text, $token);
+ } else {
+ // State 1.5: \n<hr />
+ // --
+ }
}
} else {
// State 2: <div>PAR1... (similar to 1.4)
public $needed = array('a' => array('href'));
public function prepare($config, $context) {
- $this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL');
+ $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL');
return parent::prepare($config, $context);
}
class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
{
- private $context, $config;
+ private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
public function prepare($config, $context) {
parent::prepare($config, $context);
$this->config = $config;
$this->context = $context;
+ $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
+ $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
$this->attrValidator = new HTMLPurifier_AttrValidator();
}
$next = false;
for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
$next = $this->inputTokens[$i];
- if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue;
+ if ($next instanceof HTMLPurifier_Token_Text) {
+ if ($next->is_whitespace) continue;
+ if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
+ $plain = str_replace("\xC2\xA0", "", $next->data);
+ $isWsOrNbsp = $plain === '' || ctype_space($plain);
+ if ($isWsOrNbsp) continue;
+ }
+ }
break;
}
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
--- /dev/null
+<?php
+
+/**
+ * Injector that removes spans with no attributes
+ */
+class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
+{
+ public $name = 'RemoveSpansWithoutAttributes';
+ public $needed = array('span');
+
+ private $attrValidator;
+
+ /**
+ * Used by AttrValidator
+ */
+ private $config;
+ private $context;
+
+ public function prepare($config, $context) {
+ $this->attrValidator = new HTMLPurifier_AttrValidator();
+ $this->config = $config;
+ $this->context = $context;
+ return parent::prepare($config, $context);
+ }
+
+ public function handleElement(&$token) {
+ if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
+ return;
+ }
+
+ // We need to validate the attributes now since this doesn't normally
+ // happen until after MakeWellFormed. If all the attributes are removed
+ // the span needs to be removed too.
+ $this->attrValidator->validateToken($token, $this->config, $this->context);
+ $token->armor['ValidateAttributes'] = true;
+
+ if (!empty($token->attr)) {
+ return;
+ }
+
+ $nesting = 0;
+ $spanContentTokens = array();
+ while ($this->forwardUntilEndToken($i, $current, $nesting)) {}
+
+ if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
+ // Mark closing span tag for deletion
+ $current->markForDeletion = true;
+ // Delete open span tag
+ $token = false;
+ }
+ }
+
+ public function handleEnd(&$token) {
+ if ($token->markForDeletion) {
+ $token = false;
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
protected $allowedParam = array(
'wmode' => true,
'movie' => true,
+ 'flashvars' => true,
+ 'src' => true,
+ 'allowFullScreen' => true, // if omitted, assume to be 'false'
);
public function prepare($config, $context) {
// We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is
// *very* Flash specific.
- if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') {
+ if (!isset($this->objectStack[$i]->attr['data']) &&
+ ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) {
$this->objectStack[$i]->attr['data'] = $token->attr['value'];
}
// Check if the parameter is the correct value but has not
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
+'Lexer: Extracted body' => 'Removed document metadata tags',
'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',
// validate language code
if ($code === false) {
$code = $this->validator->validate(
- $config->get('Core', 'Language'), $config, $context
+ $config->get('Core.Language'), $config, $context
);
} else {
$code = $this->validator->validate($code, $config, $context);
HTMLPurifier_Lexer::create() is deprecated, please instead
use %Core.LexerImpl", E_USER_WARNING);
} else {
- $lexer = $config->get('Core', 'LexerImpl');
+ $lexer = $config->get('Core.LexerImpl');
}
$needs_tracking =
- $config->get('Core', 'MaintainLineNumbers') ||
- $config->get('Core', 'CollectErrors');
+ $config->get('Core.MaintainLineNumbers') ||
+ $config->get('Core.CollectErrors');
$inst = null;
if (is_object($lexer)) {
);
}
+ /**
+ * Special Internet Explorer conditional comments should be removed.
+ */
+ protected static function removeIEConditional($string) {
+ return preg_replace(
+ '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
+ '',
+ $string
+ );
+ }
+
/**
* Callback function for escapeCDATA() that does the work.
*
public function normalize($html, $config, $context) {
// normalize newlines to \n
- $html = str_replace("\r\n", "\n", $html);
- $html = str_replace("\r", "\n", $html);
+ if ($config->get('Core.NormalizeNewlines')) {
+ $html = str_replace("\r\n", "\n", $html);
+ $html = str_replace("\r", "\n", $html);
+ }
- if ($config->get('HTML', 'Trusted')) {
+ if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
// escape CDATA
$html = $this->escapeCDATA($html);
+ $html = $this->removeIEConditional($html);
+
// extract body from document if applicable
- if ($config->get('Core', 'ConvertDocumentToFragment')) {
- $html = $this->extractBody($html);
+ if ($config->get('Core.ConvertDocumentToFragment')) {
+ $e = false;
+ if ($config->get('Core.CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+ $new_html = $this->extractBody($html);
+ if ($e && $new_html != $html) {
+ $e->send(E_WARNING, 'Lexer: Extracted body');
+ }
+ $html = $new_html;
}
// expand entities that aren't the big five
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
+ // if processing instructions are to removed, remove them now
+ if ($config->get('Core.RemoveProcessingInstructions')) {
+ $html = preg_replace('#<\?.+?\?>#s', '', $html);
+ }
+
return $html;
}
*/
public function extractBody($html) {
$matches = array();
- $result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
+ $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
if ($result) {
return $matches[1];
} else {
// attempt to armor stray angled brackets that cannot possibly
// form tags and thus are probably being used as emoticons
- if ($config->get('Core', 'AggressivelyFixLt')) {
+ if ($config->get('Core.AggressivelyFixLt')) {
$char = '[^a-z!\/]';
$comment = "/<!--(.*?)(-->|\z)/is";
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
}
/**
- * Recursive function that tokenizes a node, putting it into an accumulator.
- *
+ * Iterative function that tokenizes a node, putting it into an accumulator.
+ * To iterate is human, to recurse divine - L. Peter Deutsch
* @param $node DOMNode to be tokenized.
* @param $tokens Array-list of already tokenized tokens.
- * @param $collect Says whether or start and close are collected, set to
- * false at first recursion because it's the implicit DIV
- * tag you're dealing with.
* @returns Tokens of node appended to previously passed tokens.
*/
- protected function tokenizeDOM($node, &$tokens, $collect = false) {
+ protected function tokenizeDOM($node, &$tokens) {
+
+ $level = 0;
+ $nodes = array($level => array($node));
+ $closingNodes = array();
+ do {
+ while (!empty($nodes[$level])) {
+ $node = array_shift($nodes[$level]); // FIFO
+ $collect = $level > 0 ? true : false;
+ $needEndingTag = $this->createStartNode($node, $tokens, $collect);
+ if ($needEndingTag) {
+ $closingNodes[$level][] = $node;
+ }
+ if ($node->childNodes && $node->childNodes->length) {
+ $level++;
+ $nodes[$level] = array();
+ foreach ($node->childNodes as $childNode) {
+ array_push($nodes[$level], $childNode);
+ }
+ }
+ }
+ $level--;
+ if ($level && isset($closingNodes[$level])) {
+ while($node = array_pop($closingNodes[$level])) {
+ $this->createEndNode($node, $tokens);
+ }
+ }
+ } while ($level > 0);
+ }
+ /**
+ * @param $node DOMNode to be tokenized.
+ * @param $tokens Array-list of already tokenized tokens.
+ * @param $collect Says whether or start and close are collected, set to
+ * false at first recursion because it's the implicit DIV
+ * tag you're dealing with.
+ * @returns bool if the token needs an endtoken
+ */
+ protected function createStartNode($node, &$tokens, $collect) {
// intercept non element nodes. WE MUST catch all of them,
// but we're not getting the character reference nodes because
// those should have been preprocessed
if ($node->nodeType === XML_TEXT_NODE) {
$tokens[] = $this->factory->createText($node->data);
- return;
+ return false;
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
// undo libxml's special treatment of <script> and <style> tags
$last = end($tokens);
}
}
$tokens[] = $this->factory->createText($this->parseData($data));
- return;
+ return false;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
// this is code is only invoked for comments in script/style in versions
// of libxml pre-2.6.28 (regular comments, of course, are still
// handled regularly)
$tokens[] = $this->factory->createComment($node->data);
- return;
+ return false;
} elseif (
// not-well tested: there may be other nodes we have to grab
$node->nodeType !== XML_ELEMENT_NODE
) {
- return;
+ return false;
}
- $attr = $node->hasAttributes() ?
- $this->transformAttrToAssoc($node->attributes) :
- array();
+ $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
// We still have to make sure that the element actually IS empty
if (!$node->childNodes->length) {
if ($collect) {
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
}
+ return false;
} else {
- if ($collect) { // don't wrap on first iteration
+ if ($collect) {
$tokens[] = $this->factory->createStart(
$tag_name = $node->tagName, // somehow, it get's dropped
$attr
);
}
- foreach ($node->childNodes as $node) {
- // remember, it's an accumulator. Otherwise, we'd have
- // to use array_merge
- $this->tokenizeDOM($node, $tokens, true);
- }
- if ($collect) {
- $tokens[] = $this->factory->createEnd($tag_name);
- }
+ return true;
}
+ }
+ protected function createEndNode($node, &$tokens) {
+ $tokens[] = $this->factory->createEnd($node->tagName);
}
+
/**
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
*
// special normalization for script tags without any armor
// our "armor" heurstic is a < sign any number of whitespaces after
// the first script tag
- if ($config->get('HTML', 'Trusted')) {
+ if ($config->get('HTML.Trusted')) {
$html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
array($this, 'scriptCallback'), $html);
}
$array = array(); // result array
// This is also treated to mean maintain *column* numbers too
- $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
+ $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
if ($maintain_line_numbers === null) {
// automatically determine line numbering by checking
// if error collection is on
- $maintain_line_numbers = $config->get('Core', 'CollectErrors');
+ $maintain_line_numbers = $config->get('Core.CollectErrors');
}
if ($maintain_line_numbers) {
$nl = "\n";
// how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off
- $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
+ $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
$e = false;
- if ($config->get('Core', 'CollectErrors')) {
+ if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
if ($string == '') return array(); // no attributes
$e = false;
- if ($config->get('Core', 'CollectErrors')) {
+ if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
}
}
if ($value === false) $value = '';
- return array($key => $value);
+ return array($key => $this->parseData($value));
}
// setup loop environment
* Internal accumulator array for SAX parsers.
*/
protected $tokens = array();
+ protected $last_token_was_empty;
+
+ private $parent_handler;
+ private $stack = array();
public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
+ $this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context);
+ $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
+
$parser = new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
$parser->parse($string);
+ restore_error_handler();
+
return $this->tokens;
}
}
if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
+ $this->last_token_was_empty = true;
} else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
}
+ $this->stack[] = $name;
return true;
}
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
- if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
+ if ($this->last_token_was_empty) {
+ $this->last_token_was_empty = false;
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
+ if (!empty($this->stack)) array_pop($this->stack);
return true;
}
* Data event handler, interface is defined by PEAR package.
*/
public function dataHandler(&$parser, $data) {
+ $this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
*/
public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
- $this->tokens[] = new HTMLPurifier_Token_Comment($data);
+ // remove trailing and leading double-dashes
+ $data = substr($data, 2);
+ if (strlen($data) >= 2 && substr($data, -2) == "--") {
+ $data = substr($data, 0, -2);
+ }
+ if (isset($this->stack[sizeof($this->stack) - 1]) &&
+ $this->stack[sizeof($this->stack) - 1] == "style") {
+ $this->tokens[] = new HTMLPurifier_Token_Text($data);
+ } else {
+ $this->tokens[] = new HTMLPurifier_Token_Comment($data);
+ }
+ $this->last_token_was_empty = false;
}
// CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) {
return true;
}
+ /**
+ * An error handler that mutes strict errors
+ */
+ public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
+ if ($errno == E_STRICT) return;
+ return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
+ }
+
}
// vim: et sw=4 sts=4
const EOF = 5;
public function __construct($data) {
- $data = str_replace("\r\n", "\n", $data);
- $data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
$all = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
- $all[$ns][$directive] = $config->get($ns, $directive);
+ $all[$ns][$directive] = $config->get($ns .'.'. $directive);
}
$ret = '';
$ret .= $this->end('th');
$ret .= $this->start('td');
- $def = $this->config->def->info[$ns][$directive];
+ $def = $this->config->def->info["$ns.$directive"];
if (is_int($def)) {
$allow_null = $def < 0;
$type = abs($def);
$this->prepareGenerator($gen_config);
// this should probably be split up a little
$ret = '';
- $def = $config->def->info[$ns][$directive];
+ $def = $config->def->info["$ns.$directive"];
if (is_int($def)) {
$type = abs($def);
} else {
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
+ *
+ * Specification of the armor attributes this strategy uses:
+ *
+ * - MakeWellFormed_TagClosedError: This armor field is used to
+ * suppress tag closed errors for certain tokens [TagClosedSuppress],
+ * in particular, if a tag was generated automatically by HTML
+ * Purifier, we may rely on our infrastructure to close it for us
+ * and shouldn't report an error to the user [TagClosedAuto].
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
// local variables
$generator = new HTMLPurifier_Generator($config, $context);
- $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
+ $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
+ // used for autoclose early abortion
+ $global_parent_allowed_elements = array();
+ if (isset($definition->info[$definition->info_parent])) {
+ // may be unset under testing circumstances
+ $global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
+ }
$e = $context->get('ErrorCollector', true);
$t = false; // token index
$i = false; // injector index
$custom_injectors = $injectors['Custom'];
unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) {
+ // XXX: Fix with a legitimate lookup table of enabled filters
+ if (strpos($injector, '.') !== false) continue;
$injector = "HTMLPurifier_Injector_$injector";
if (!$b) continue;
$this->injectors[] = new $injector;
$this->injectors[] = $injector;
}
foreach ($custom_injectors as $injector) {
+ if (!$injector) continue;
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
// -- end INJECTOR --
- // a note on punting:
+ // a note on reprocessing:
// In order to reduce code duplication, whenever some code needs
// to make HTML changes in order to make things "correct", the
// new HTML gets sent through the purifier, regardless of its
$top_nesting = array_pop($this->stack);
$this->stack[] = $top_nesting;
- // send error
+ // send error [TagClosedSuppress]
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
$token = $tokens[$t];
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
+ //flush();
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
$ok = false;
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// claims to be a start tag but is empty
- $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
+ $token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
$ok = true;
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$this->swap(new HTMLPurifier_Token_End($token->name));
- $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
+ $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
// punt (since we had to modify the input stream in a non-trivial way)
$reprocess = true;
continue;
// ...unless they also have to close their parent
if (!empty($this->stack)) {
+ // Performance note: you might think that it's rather
+ // inefficient, recalculating the autoclose information
+ // for every tag that a token closes (since when we
+ // do an autoclose, we push a new token into the
+ // stream and then /process/ that, before
+ // re-processing this token.) But this is
+ // necessary, because an injector can make an
+ // arbitrary transformations to the autoclosing
+ // tokens we introduce, so things may have changed
+ // in the meantime. Also, doing the inefficient thing is
+ // "easy" to reason about (for certain perverse definitions
+ // of "easy")
+
$parent = array_pop($this->stack);
$this->stack[] = $parent;
$autoclose = false;
}
+ if ($autoclose && $definition->info[$token->name]->wrap) {
+ // Check if an element can be wrapped by another
+ // element to make it valid in a context (for
+ // example, <ul><ul> needs a <li> in between)
+ $wrapname = $definition->info[$token->name]->wrap;
+ $wrapdef = $definition->info[$wrapname];
+ $elements = $wrapdef->child->getAllowedElements($config);
+ $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
+ if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
+ $newtoken = new HTMLPurifier_Token_Start($wrapname);
+ $this->insertBefore($newtoken);
+ $reprocess = true;
+ continue;
+ }
+ }
+
$carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true;
}
if ($autoclose) {
- // errors need to be updated
- $new_token = new HTMLPurifier_Token_End($parent->name);
- $new_token->start = $parent;
- if ($carryover) {
- $element = clone $parent;
- $element->armor['MakeWellFormed_TagClosedError'] = true;
- $element->carryover = true;
- $this->processToken(array($new_token, $token, $element));
- } else {
- $this->insertBefore($new_token);
+ // check if this autoclose is doomed to fail
+ // (this rechecks $parent, which his harmless)
+ $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
+ if (!$autoclose_ok) {
+ foreach ($this->stack as $ancestor) {
+ $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
+ if (isset($elements[$token->name])) {
+ $autoclose_ok = true;
+ break;
+ }
+ if ($definition->info[$token->name]->wrap) {
+ $wrapname = $definition->info[$token->name]->wrap;
+ $wrapdef = $definition->info[$wrapname];
+ $wrap_elements = $wrapdef->child->getAllowedElements($config);
+ if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
+ $autoclose_ok = true;
+ break;
+ }
+ }
+ }
}
- if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
- if (!$carryover) {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
+ if ($autoclose_ok) {
+ // errors need to be updated
+ $new_token = new HTMLPurifier_Token_End($parent->name);
+ $new_token->start = $parent;
+ if ($carryover) {
+ $element = clone $parent;
+ // [TagClosedAuto]
+ $element->armor['MakeWellFormed_TagClosedError'] = true;
+ $element->carryover = true;
+ $this->processToken(array($new_token, $token, $element));
} else {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
+ $this->insertBefore($new_token);
}
+ // [TagClosedSuppress]
+ if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
+ if (!$carryover) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
+ } else {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
+ }
+ }
+ } else {
+ $this->remove();
}
$reprocess = true;
continue;
if ($e) {
for ($j = $c - 1; $j > 0; $j--) {
// notice we exclude $j == 0, i.e. the current ending tag, from
- // the errors...
+ // the errors... [TagClosedSuppress]
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
}
$new_token->start = $skipped_tags[$j];
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
+ // [TagClosedAuto]
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
}
/**
- * Inserts a token before the current token. Cursor now points to this token
+ * Inserts a token before the current token. Cursor now points to
+ * this token. You must reprocess after this.
*/
private function insertBefore($token) {
array_splice($this->tokens, $this->t, 0, array($token));
/**
* Removes current token. Cursor now points to new token occupying previously
- * occupied space.
+ * occupied space. You must reprocess after this.
*/
private function remove() {
array_splice($this->tokens, $this->t, 1);
}
/**
- * Swap current token with new token. Cursor points to new token (no change).
+ * Swap current token with new token. Cursor points to new token (no
+ * change). You must reprocess after this.
*/
private function swap($token) {
$this->tokens[$this->t] = $token;
$generator = new HTMLPurifier_Generator($config, $context);
$result = array();
- $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
- $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
+ $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
+ $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
// currently only used to determine if comments should be kept
- $trusted = $config->get('HTML', 'Trusted');
+ $trusted = $config->get('HTML.Trusted');
- $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
- $hidden_elements = $config->get('Core', 'HiddenElements');
+ $remove_script_contents = $config->get('Core.RemoveScriptContents');
+ $hidden_elements = $config->get('Core.HiddenElements');
// remove script contents compatibility
if ($remove_script_contents === true) {
$context->register('CurrentToken', $token);
$e = false;
- if ($config->get('Core', 'CollectErrors')) {
+ if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
// handle size transform
if (isset($attr['size'])) {
// normalize large numbers
- if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
- $size = (int) $attr['size'];
- if ($size < -2) $attr['size'] = '-2';
- if ($size > 4) $attr['size'] = '+4';
- } else {
- $size = (int) $attr['size'];
- if ($size > 7) $attr['size'] = '7';
+ if ($attr['size'] !== '') {
+ if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
+ $size = (int) $attr['size'];
+ if ($size < -2) $attr['size'] = '-2';
+ if ($size > 4) $attr['size'] = '+4';
+ } else {
+ $size = (int) $attr['size'];
+ if ($size > 7) $attr['size'] = '7';
+ }
}
if (isset($this->_size_lookup[$attr['size']])) {
$prepend_style .= 'font-size:' .
* @param $name String name.
* @param $attr Associative array of attributes.
*/
- public function __construct($name, $attr = array(), $line = null, $col = null) {
+ public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
$this->attr = $attr;
$this->line = $line;
$this->col = $col;
+ $this->armor = $armor;
}
}
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
- // validate scheme (MUST BE FIRST!)
- if (!is_null($this->scheme) && is_null($this->host)) {
- $def = $config->getDefinition('URI');
- if ($def->defaultScheme === $this->scheme) {
- $this->scheme = null;
- }
- }
-
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
if ($this->host === false) $this->host = null;
}
+ // validate scheme
+ // NOTE: It's not appropriate to check whether or not this
+ // scheme is in our registry, since a URIFilter may convert a
+ // URI that we don't allow into one we do. So instead, we just
+ // check if the scheme can be dropped because there is no host
+ // and it is our default scheme.
+ if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
+ // support for relative paths is pretty abysmal when the
+ // scheme is present, so axe it when possible
+ $def = $config->getDefinition('URI');
+ if ($def->defaultScheme === $this->scheme) {
+ $this->scheme = null;
+ }
+ }
+
// validate username
if (!is_null($this->userinfo)) {
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
// validate path
$path_parts = array();
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
- if (!is_null($this->host)) {
+ if (!is_null($this->host)) { // this catches $this->host === ''
// path-abempty (hier and relative)
+ // http://www.example.com/my/path
+ // //www.example.com/my/path (looks odd, but works, and
+ // recognized by most browsers)
+ // (this set is valid or invalid on a scheme by scheme
+ // basis, so we'll deal with it later)
+ // file:///my/path
+ // ///my/path
$this->path = $segments_encoder->encode($this->path);
- } elseif ($this->path !== '' && $this->path[0] === '/') {
- // path-absolute (hier and relative)
- if (strlen($this->path) >= 2 && $this->path[1] === '/') {
- // This shouldn't ever happen!
- $this->path = '';
- } else {
+ } elseif ($this->path !== '') {
+ if ($this->path[0] === '/') {
+ // path-absolute (hier and relative)
+ // http:/my/path
+ // /my/path
+ if (strlen($this->path) >= 2 && $this->path[1] === '/') {
+ // This could happen if both the host gets stripped
+ // out
+ // http://my/path
+ // //my/path
+ $this->path = '';
+ } else {
+ $this->path = $segments_encoder->encode($this->path);
+ }
+ } elseif (!is_null($this->scheme)) {
+ // path-rootless (hier)
+ // http:my/path
+ // Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode($this->path);
- }
- } elseif (!is_null($this->scheme) && $this->path !== '') {
- // path-rootless (hier)
- // Short circuit evaluation means we don't need to check nz
- $this->path = $segments_encoder->encode($this->path);
- } elseif (is_null($this->scheme) && $this->path !== '') {
- // path-noscheme (relative)
- // (once again, not checking nz)
- $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
- $c = strpos($this->path, '/');
- if ($c !== false) {
- $this->path =
- $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
- $segments_encoder->encode(substr($this->path, $c));
} else {
- $this->path = $segment_nc_encoder->encode($this->path);
+ // path-noscheme (relative)
+ // my/path
+ // (once again, not checking nz)
+ $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
+ $c = strpos($this->path, '/');
+ if ($c !== false) {
+ $this->path =
+ $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
+ $segments_encoder->encode(substr($this->path, $c));
+ } else {
+ $this->path = $segment_nc_encoder->encode($this->path);
+ }
}
} else {
// path-empty (hier and relative)
public function toString() {
// reconstruct authority
$authority = null;
+ // there is a rendering difference between a null authority
+ // (http:foo-bar) and an empty string authority
+ // (http:///foo-bar).
if (!is_null($this->host)) {
$authority = '';
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
if(!is_null($this->port)) $authority .= ':' . $this->port;
}
- // reconstruct the result
+ // Reconstruct the result
+ // One might wonder about parsing quirks from browsers after
+ // this reconstruction. Unfortunately, parsing behavior depends
+ // on what *scheme* was employed (file:///foo is handled *very*
+ // differently than http:///foo), so unfortunately we have to
+ // defer to the schemes to do the right thing.
$result = '';
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
if (!is_null($authority)) $result .= '//' . $authority;
protected function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) {
- $conf = $config->get('URI', $name);
+ $conf = $config->get('URI.' . $name);
if ($conf !== false && $conf !== null) {
$this->addFilter($filter, $config);
}
}
protected function setupMemberVariables($config) {
- $this->host = $config->get('URI', 'Host');
- $base_uri = $config->get('URI', 'Base');
+ $this->host = $config->get('URI.Host');
+ $base_uri = $config->get('URI.Base');
if (!is_null($base_uri)) {
$parser = new HTMLPurifier_URIParser();
$this->base = $parser->parse($base_uri);
$this->defaultScheme = $this->base->scheme;
if (is_null($this->host)) $this->host = $this->base->host;
}
- if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
+ if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
}
public function filter(&$uri, $config, $context) {
--- /dev/null
+<?php
+
+class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
+{
+ public $name = 'DisableResources';
+ public function filter(&$uri, $config, $context) {
+ return !$context->get('EmbeddedURI', true);
+ }
+}
+
+// vim: et sw=4 sts=4
public $name = 'HostBlacklist';
protected $blacklist = array();
public function prepare($config) {
- $this->blacklist = $config->get('URI', 'HostBlacklist');
+ $this->blacklist = $config->get('URI.HostBlacklist');
return true;
}
public function filter(&$uri, $config, $context) {
protected $replace = array();
public function prepare($config) {
- $this->target = $config->get('URI', $this->name);
+ $this->target = $config->get('URI.' . $this->name);
$this->parser = new HTMLPurifier_URIParser();
- $this->doEmbed = $config->get('URI', 'MungeResources');
- $this->secretKey = $config->get('URI', 'MungeSecretKey');
+ $this->doEmbed = $config->get('URI.MungeResources');
+ $this->secretKey = $config->get('URI.MungeSecretKey');
return true;
}
public function filter(&$uri, $config, $context) {
if (is_null($uri->host) || empty($scheme_obj->browsable)) {
return true;
}
+ // don't redirect if target host is our host
+ if ($uri->host === $config->getDefinition('URI')->host) {
+ return true;
+ }
$this->makeReplace($uri, $config, $context);
$this->replace = array_map('rawurlencode', $this->replace);
/**
* Validator for the components of a URI for a specific scheme
*/
-class HTMLPurifier_URIScheme
+abstract class HTMLPurifier_URIScheme
{
/**
- * Scheme's default port (integer)
+ * Scheme's default port (integer). If an explicit port number is
+ * specified that coincides with the default port, it will be
+ * elided.
*/
public $default_port = null;
public $hierarchical = false;
/**
- * Validates the components of a URI
- * @note This implementation should be called by children if they define
- * a default port, as it does port processing.
- * @param $uri Instance of HTMLPurifier_URI
+ * Whether or not the URI may omit a hostname when the scheme is
+ * explicitly specified, ala file:///path/to/file. As of writing,
+ * 'file' is the only scheme that browsers support his properly.
+ */
+ public $may_omit_host = false;
+
+ /**
+ * Validates the components of a URI for a specific scheme.
+ * @param $uri Reference to a HTMLPurifier_URI object
+ * @param $config HTMLPurifier_Config object
+ * @param $context HTMLPurifier_Context object
+ * @return Bool success or failure
+ */
+ public abstract function doValidate(&$uri, $config, $context);
+
+ /**
+ * Public interface for validating components of a URI. Performs a
+ * bunch of default actions. Don't overload this method.
+ * @param $uri Reference to a HTMLPurifier_URI object
* @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object
* @return Bool success or failure
*/
public function validate(&$uri, $config, $context) {
if ($this->default_port == $uri->port) $uri->port = null;
- return true;
+ // kludge: browsers do funny things when the scheme but not the
+ // authority is set
+ if (!$this->may_omit_host &&
+ // if the scheme is present, a missing host is always in error
+ (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
+ // if the scheme is not present, a *blank* host is in error,
+ // since this translates into '///path' which most browsers
+ // interpret as being 'http://path'.
+ (is_null($uri->scheme) && $uri->host === '')
+ ) {
+ do {
+ if (is_null($uri->scheme)) {
+ if (substr($uri->path, 0, 2) != '//') {
+ $uri->host = null;
+ break;
+ }
+ // URI is '////path', so we cannot nullify the
+ // host to preserve semantics. Try expanding the
+ // hostname instead (fall through)
+ }
+ // first see if we can manually insert a hostname
+ $host = $config->get('URI.Host');
+ if (!is_null($host)) {
+ $uri->host = $host;
+ } else {
+ // we can't do anything sensible, reject the URL.
+ return false;
+ }
+ } while (false);
+ }
+ return $this->doValidate($uri, $config, $context);
}
}
--- /dev/null
+<?php
+
+/**
+ * Implements data: URI for base64 encoded images supported by GD.
+ */
+class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
+
+ public $browsable = true;
+ public $allowed_types = array(
+ // you better write validation code for other types if you
+ // decide to allow them
+ 'image/jpeg' => true,
+ 'image/gif' => true,
+ 'image/png' => true,
+ );
+ // this is actually irrelevant since we only write out the path
+ // component
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $result = explode(',', $uri->path, 2);
+ $is_base64 = false;
+ $charset = null;
+ $content_type = null;
+ if (count($result) == 2) {
+ list($metadata, $data) = $result;
+ // do some legwork on the metadata
+ $metas = explode(';', $metadata);
+ while(!empty($metas)) {
+ $cur = array_shift($metas);
+ if ($cur == 'base64') {
+ $is_base64 = true;
+ break;
+ }
+ if (substr($cur, 0, 8) == 'charset=') {
+ // doesn't match if there are arbitrary spaces, but
+ // whatever dude
+ if ($charset !== null) continue; // garbage
+ $charset = substr($cur, 8); // not used
+ } else {
+ if ($content_type !== null) continue; // garbage
+ $content_type = $cur;
+ }
+ }
+ } else {
+ $data = $result[0];
+ }
+ if ($content_type !== null && empty($this->allowed_types[$content_type])) {
+ return false;
+ }
+ if ($charset !== null) {
+ // error; we don't allow plaintext stuff
+ $charset = null;
+ }
+ $data = rawurldecode($data);
+ if ($is_base64) {
+ $raw_data = base64_decode($data);
+ } else {
+ $raw_data = $data;
+ }
+ // XXX probably want to refactor this into a general mechanism
+ // for filtering arbitrary content types
+ $file = tempnam("/tmp", "");
+ file_put_contents($file, $raw_data);
+ if (function_exists('exif_imagetype')) {
+ $image_code = exif_imagetype($file);
+ } elseif (function_exists('getimagesize')) {
+ set_error_handler(array($this, 'muteErrorHandler'));
+ $info = getimagesize($file);
+ restore_error_handler();
+ if ($info == false) return false;
+ $image_code = $info[2];
+ } else {
+ trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
+ }
+ $real_content_type = image_type_to_mime_type($image_code);
+ if ($real_content_type != $content_type) {
+ // we're nice guys; if the content type is something else we
+ // support, change it over
+ if (empty($this->allowed_types[$real_content_type])) return false;
+ $content_type = $real_content_type;
+ }
+ // ok, it's kosher, rewrite what we need
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->fragment = null;
+ $uri->query = null;
+ $uri->path = "$content_type;base64," . base64_encode($raw_data);
+ return true;
+ }
+
+ public function muteErrorHandler($errno, $errstr) {}
+
+}
+
--- /dev/null
+<?php
+
+/**
+ * Validates file as defined by RFC 1630 and RFC 1738.
+ */
+class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme {
+
+ // Generally file:// URLs are not accessible from most
+ // machines, so placing them as an img src is incorrect.
+ public $browsable = false;
+
+ // Basically the *only* URI scheme for which this is true, since
+ // accessing files on the local machine is very common. In fact,
+ // browsers on some operating systems don't understand the
+ // authority, though I hear it is used on Windows to refer to
+ // network shares.
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ // Authentication method is not supported
+ $uri->userinfo = null;
+ // file:// makes no provisions for accessing the resource
+ $uri->port = null;
+ // While it seems to work on Firefox, the querystring has
+ // no possible effect and is thus stripped.
+ $uri->query = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
public $browsable = true; // usually
public $hierarchical = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ public function doValidate(&$uri, $config, $context) {
$uri->query = null;
// typecode check
public $browsable = true;
public $hierarchical = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ public function doValidate(&$uri, $config, $context) {
$uri->userinfo = null;
return true;
}
class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
public $browsable = false;
+ public $may_omit_host = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ public function doValidate(&$uri, $config, $context) {
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
public $browsable = false;
+ public $may_omit_host = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ public function doValidate(&$uri, $config, $context) {
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
public $default_port = 119;
public $browsable = false;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ public function doValidate(&$uri, $config, $context) {
$uri->userinfo = null;
$uri->query = null;
return true;
*/
public function getScheme($scheme, $config, $context) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
- $null = null; // for the sake of passing by reference
// important, otherwise attacker could include arbitrary file
- $allowed_schemes = $config->get('URI', 'AllowedSchemes');
- if (!$config->get('URI', 'OverrideAllowedSchemes') &&
+ $allowed_schemes = $config->get('URI.AllowedSchemes');
+ if (!$config->get('URI.OverrideAllowedSchemes') &&
!isset($allowed_schemes[$scheme])
) {
- return $null;
+ return;
}
if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
- if (!isset($allowed_schemes[$scheme])) return $null;
+ if (!isset($allowed_schemes[$scheme])) return;
$class = 'HTMLPurifier_URIScheme_' . $scheme;
- if (!class_exists($class)) return $null;
+ if (!class_exists($class)) return;
$this->schemes[$scheme] = new $class();
return $this->schemes[$scheme];
}
foreach ($var as $keypair) {
$c = explode(':', $keypair, 2);
if (!isset($c[1])) continue;
- $nvar[$c[0]] = $c[1];
+ $nvar[trim($c[0])] = trim($c[1]);
}
$var = $nvar;
}
return $new;
} else break;
}
+ if ($type === self::ALIST) {
+ trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
+ return array_values($var);
+ }
if ($type === self::LOOKUP) {
foreach ($var as $key => $value) {
+ if ($value !== true) {
+ trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING);
+ }
$var[$key] = true;
}
}