From: Andrew Dolgov Date: Tue, 5 Jun 2012 17:52:37 +0000 (+0400) Subject: Revert "Update HTML Purifier to version 4.4.0." X-Git-Tag: 1.6.0~219 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=cb73535c8eae02092df984bafbecabbce8049cd0;p=tt-rss.git Revert "Update HTML Purifier to version 4.4.0." This reverts commit dd205fbad642ace6d0e33c8553f7d73404f140b4. --- diff --git a/lib/htmlpurifier/library/HTMLPurifier.includes.php b/lib/htmlpurifier/library/HTMLPurifier.includes.php index 0ceff6a9..b9baf8f0 100644 --- a/lib/htmlpurifier/library/HTMLPurifier.includes.php +++ b/lib/htmlpurifier/library/HTMLPurifier.includes.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 4.4.0 + * @version 4.3.0 * * @warning * You must *not* include any other HTML Purifier files before this file, @@ -73,7 +73,6 @@ require 'HTMLPurifier/UnitConverter.php'; require 'HTMLPurifier/VarParser.php'; require 'HTMLPurifier/VarParserException.php'; require 'HTMLPurifier/AttrDef/CSS.php'; -require 'HTMLPurifier/AttrDef/Clone.php'; require 'HTMLPurifier/AttrDef/Enum.php'; require 'HTMLPurifier/AttrDef/Integer.php'; require 'HTMLPurifier/AttrDef/Lang.php'; @@ -91,7 +90,6 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php'; require 'HTMLPurifier/AttrDef/CSS/Filter.php'; require 'HTMLPurifier/AttrDef/CSS/Font.php'; require 'HTMLPurifier/AttrDef/CSS/FontFamily.php'; -require 'HTMLPurifier/AttrDef/CSS/Ident.php'; require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require 'HTMLPurifier/AttrDef/CSS/Length.php'; require 'HTMLPurifier/AttrDef/CSS/ListStyle.php'; @@ -132,12 +130,10 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php'; require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php'; require 'HTMLPurifier/AttrTransform/ScriptRequired.php'; -require 'HTMLPurifier/AttrTransform/TargetBlank.php'; require 'HTMLPurifier/AttrTransform/Textarea.php'; require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Custom.php'; require 'HTMLPurifier/ChildDef/Empty.php'; -require 'HTMLPurifier/ChildDef/List.php'; require 'HTMLPurifier/ChildDef/Required.php'; require 'HTMLPurifier/ChildDef/Optional.php'; require 'HTMLPurifier/ChildDef/StrictBlockquote.php'; @@ -152,7 +148,6 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php'; require 'HTMLPurifier/HTMLModule/Edit.php'; require 'HTMLPurifier/HTMLModule/Forms.php'; require 'HTMLPurifier/HTMLModule/Hypertext.php'; -require 'HTMLPurifier/HTMLModule/Iframe.php'; require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/List.php'; @@ -169,7 +164,6 @@ require 'HTMLPurifier/HTMLModule/Scripting.php'; require 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require 'HTMLPurifier/HTMLModule/Tables.php'; require 'HTMLPurifier/HTMLModule/Target.php'; -require 'HTMLPurifier/HTMLModule/TargetBlank.php'; require 'HTMLPurifier/HTMLModule/Text.php'; require 'HTMLPurifier/HTMLModule/Tidy.php'; require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; @@ -208,7 +202,6 @@ require 'HTMLPurifier/URIFilter/DisableResources.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/Munge.php'; -require 'HTMLPurifier/URIFilter/SafeIframe.php'; require 'HTMLPurifier/URIScheme/data.php'; require 'HTMLPurifier/URIScheme/file.php'; require 'HTMLPurifier/URIScheme/ftp.php'; diff --git a/lib/htmlpurifier/library/HTMLPurifier.php b/lib/htmlpurifier/library/HTMLPurifier.php index e599e1c0..914ba25a 100644 --- a/lib/htmlpurifier/library/HTMLPurifier.php +++ b/lib/htmlpurifier/library/HTMLPurifier.php @@ -19,7 +19,7 @@ */ /* - HTML Purifier 4.4.0 - Standards Compliant HTML Filtering + HTML Purifier 4.3.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -55,10 +55,10 @@ class HTMLPurifier { /** Version of HTML Purifier */ - public $version = '4.4.0'; + public $version = '4.3.0'; /** Constant with version of HTML Purifier */ - const VERSION = '4.4.0'; + const VERSION = '4.3.0'; /** Global configuration object */ public $config; diff --git a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php index d49b196c..a5c0d5bb 100644 --- a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php +++ b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php @@ -67,7 +67,6 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php'; require_once $__dir . '/HTMLPurifier/VarParser.php'; require_once $__dir . '/HTMLPurifier/VarParserException.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php'; -require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php'; @@ -85,7 +84,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php'; -require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php'; @@ -126,12 +124,10 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php'; -require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php'; -require_once $__dir . '/HTMLPurifier/ChildDef/List.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Required.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php'; require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php'; @@ -146,7 +142,6 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php'; -require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; @@ -163,7 +158,6 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php'; -require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; @@ -202,7 +196,6 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; -require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php'; require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; require_once $__dir . '/HTMLPurifier/URIScheme/file.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php deleted file mode 100644 index 779794a0..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php +++ /dev/null @@ -1,24 +0,0 @@ -clone = $clone; - } - - public function validate($v, $config, $context) { - return $this->clone->validate($v, $config, $context); - } - - public function make($string) { - return clone $this->clone; - } - -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php index 00d86572..d01e2045 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef $string = trim($string); if (empty($string)) return false; - if (isset($colors[strtolower($string)])) return $colors[$string]; + if (isset($colors[$string])) return $colors[$string]; if ($string[0] === '#') $hex = substr($string, 1); else $hex = $string; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php index 0015fa1e..81d03762 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -12,22 +12,12 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef { - // selector is NOT a valid thing to use for IDREFs, because IDREFs - // *must* target IDs that exist, whereas selector #ids do not. - - /** - * Determines whether or not we're validating an ID in a CSS - * selector context. - */ - protected $selector; - - public function __construct($selector = false) { - $this->selector = $selector; - } + // ref functionality disabled, since we also have to verify + // whether or not the ID it refers to exists public function validate($id, $config, $context) { - if (!$this->selector && !$config->get('Attr.EnableID')) return false; + if (!$config->get('Attr.EnableID')) return false; $id = trim($id); // trim it first @@ -43,10 +33,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef '%Attr.IDPrefix is set', E_USER_WARNING); } - if (!$this->selector) { + //if (!$this->ref) { $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) return false; - } + //} // we purposely avoid using regex, hopefully this is faster @@ -66,7 +56,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (!$this->selector && $result) $id_accumulator->add($id); + if (/*!$this->ref && */$result) $id_accumulator->add($id); // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php index c2b68467..01a6d83e 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php @@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef } public function make($string) { - $embeds = ($string === 'embedded'); + $embeds = (bool) $string; return new HTMLPurifier_AttrDef_URI($embeds); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php index 125decb2..feca469d 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php @@ -44,8 +44,9 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // A regular domain name. - // This doesn't match I18N domain names, but we don't have proper IRI support, - // so force users to insert Punycode. + // This breaks I18N domain names, but we don't have proper IRI support, + // so force users to insert Punycode. If there's complaining we'll + // try to fix things into an international friendly form. // The productions describing this are: $a = '[a-z]'; // alpha @@ -56,44 +57,10 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // toplabel = alpha | alpha *( alphanum | "-" ) alphanum $toplabel = "$a($and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; - } - - // If we have Net_IDNA2 support, we can support IRIs by - // punycoding them. (This is the most portable thing to do, - // since otherwise we have to assume browsers support - - if ($config->get('Core.EnableIDNA')) { - $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); - // we need to encode each period separately - $parts = explode('.', $string); - try { - $new_parts = array(); - foreach ($parts as $part) { - $encodable = false; - for ($i = 0, $c = strlen($part); $i < $c; $i++) { - if (ord($part[$i]) > 0x7a) { - $encodable = true; - break; - } - } - if (!$encodable) { - $new_parts[] = $part; - } else { - $new_parts[] = $idna->encode($part); - } - } - $string = implode('.', $new_parts); - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; - } - } catch (Exception $e) { - // XXX error reporting - } - } + $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); + if (!$match) return false; - return false; + return $string; } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php index f7fb1209..573b42c9 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php @@ -24,13 +24,9 @@ class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform $url = $this->parser->parse($attr['href']); $scheme = $url->getSchemeObj($config, $context); - if ($scheme->browsable && !$url->isLocal($config, $context)) { + if (!is_null($url->host) && $scheme !== false && $scheme->browsable) { if (isset($attr['rel'])) { - $rels = explode(' ', $attr); - if (!in_array('nofollow', $rels)) { - $rels[] = 'nofollow'; - } - $attr['rel'] = implode(' ', $rels); + $attr['rel'] .= ' nofollow'; } else { $attr['rel'] = 'nofollow'; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php deleted file mode 100644 index a6502c74..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php +++ /dev/null @@ -1,38 +0,0 @@ -parser = new HTMLPurifier_URIParser(); - } - - public function transform($attr, $config, $context) { - - if (!isset($attr['href'])) { - return $attr; - } - - // XXX Kind of inefficient - $url = $this->parser->parse($attr['href']); - $scheme = $url->getSchemeObj($config, $context); - - if ($scheme->browsable && !$url->isBenign($config, $context)) { - $attr['target'] = 'blank'; - } - - return $attr; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php index 6f985ff9..fc2ea4e5 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php @@ -15,13 +15,6 @@ class HTMLPurifier_AttrTypes * types. */ public function __construct() { - // XXX This is kind of poor, since we don't actually /clone/ - // instances; instead, we use the supplied make() attribute. So, - // the underlying class must know how to deal with arguments. - // With the old implementation of Enum, that ignored its - // arguments when handling a make dispatch, the IAlign - // definition wouldn't work. - // pseudo-types, must be instantiated via shorthand $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum(); $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); @@ -36,9 +29,6 @@ class HTMLPurifier_AttrTypes $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); - $this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right'); - $this->info['LAlign'] = self::makeEnum('top,bottom,left,right'); - $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget(); // unimplemented aliases $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text(); @@ -54,10 +44,6 @@ class HTMLPurifier_AttrTypes $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); } - private static function makeEnum($in) { - return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in))); - } - /** * Retrieves a type * @param $type String type name diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php deleted file mode 100644 index cdaa2893..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php +++ /dev/null @@ -1,120 +0,0 @@ - true, 'ul' => true, 'ol' => true); - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - $seen_li = false; - $need_close_li = false; - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - if ($nesting == 1 && $need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - $nesting--; - $need_close_li = false; - } - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - if ($token->name === 'li') { - // good - $seen_li = true; - } elseif ($token->name === 'ul' || $token->name === 'ol') { - // we want to tuck this into the previous li - $need_close_li = true; - $nesting++; - if (!$seen_li) { - // create a new li element - $result[] = new HTMLPurifier_Token_Start('li'); - } else { - // backtrack until found - while(true) { - $t = array_pop($result); - if ($t instanceof HTMLPurifier_Token_End) { - // XXX actually, these invariants could very plausibly be violated - // if we are doing silly things with modifying the set of allowed elements. - // FORTUNATELY, it doesn't make a difference, since the allowed - // elements are hard-coded here! - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - break; - } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - // XXX this should have a helper for it... - $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor); - break; - } else { - if (!$t->is_whitespace) { - trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - } - } - } - } else { - // start wrapping (this doesn't precisely mimic - // browser behavior, but what browsers do is kind of - // hard to mimic in a standards compliant way - // XXX Actually, this has no impact in practice, - // because this gets handled earlier. Arguably, - // we should rip out all of that processing - $result[] = new HTMLPurifier_Token_Start('li'); - $nesting++; - $seen_li = true; - $need_close_li = true; - } - } - $result[] = $token; - } - if ($need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - } - if (empty($result)) return false; - if ($all_whitespace) { - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php index 9a93421a..34f0227d 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php @@ -1,33 +1,7 @@ s with a . foreach ($tokens_of_children as $token) { $is_child = ($nesting == 0); @@ -79,9 +51,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef // okay, let's stash the tokens away // first token tells us the type of the collection switch ($collection[$tag_index]->name) { - case 'tbody': - $tbody_mode = true; case 'tr': + case 'tbody': $content[] = $collection; break; case 'caption': @@ -90,28 +61,13 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef break; case 'thead': case 'tfoot': - $tbody_mode = true; - // XXX This breaks rendering properties with - // Firefox, which never floats a to - // the top. Ever. (Our scheme will float the - // first to the top.) So maybe - // s that are not first should be - // turned into ? Very tricky, indeed. - // access the appropriate variable, $thead or $tfoot $var = $collection[$tag_index]->name; if ($$var === false) { $$var = $collection; } else { - // Oops, there's a second one! What - // should we do? Current behavior is to - // transmutate the first and last entries into - // tbody tags, and then put into content. - // Maybe a better idea is to *attach - // it* to the existing thead or tfoot? - // We don't do this, because Firefox - // doesn't float an extra tfoot to the - // bottom like it does for the first one. + // transmutate the first and less entries into + // tbody tags, and then put into content $collection[$tag_index]->name = 'tbody'; $collection[count($collection)-1]->name = 'tbody'; $content[] = $collection; @@ -170,48 +126,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); if ($thead !== false) $ret = array_merge($ret, $thead); if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - - if ($tbody_mode) { - // a little tricky, since the start of the collection may be - // whitespace - $inside_tbody = false; - foreach ($content as $token_array) { - // find the starting token - foreach ($token_array as $t) { - if ($t->name === 'tr' || $t->name === 'tbody') { - break; - } - } // iterator variable carries over - if ($t->name === 'tr') { - if ($inside_tbody) { - $ret = array_merge($ret, $token_array); - } else { - $ret[] = new HTMLPurifier_Token_Start('tbody'); - $ret = array_merge($ret, $token_array); - $inside_tbody = true; - } - } elseif ($t->name === 'tbody') { - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - $inside_tbody = false; - $ret = array_merge($ret, $token_array); - } else { - $ret = array_merge($ret, $token_array); - } - } else { - trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR); - } - } - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - } - } else { - foreach ($content as $token_array) { - // invariant: everything in here is s - $ret = array_merge($ret, $token_array); - } - } - + foreach ($content as $token_array) $ret = array_merge($ret, $token_array); if (!empty($collection) && $is_collecting == false){ // grab the trailing space $ret = array_merge($ret, $collection); diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php index 554980f2..b6551398 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php @@ -20,7 +20,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - public $version = '4.4.0'; + public $version = '4.3.0'; /** * Bool indicator whether or not to automatically finalize @@ -44,7 +44,7 @@ class HTMLPurifier_Config /** * Parser for variables */ - protected $parser = null; + protected $parser; /** * Reference HTMLPurifier_ConfigSchema for value checking @@ -668,7 +668,7 @@ class HTMLPurifier_Config */ public function finalize() { $this->finalized = true; - $this->parser = null; + unset($this->parser); } /** diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php new file mode 100755 index 00000000..3ffac0a0 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php @@ -0,0 +1,21 @@ +Injectors) +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt new file mode 100755 index 00000000..3e8309e3 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt @@ -0,0 +1,12 @@ +AutoFormatParam.PurifierLinkifyDocURL +TYPE: string +VERSION: 2.0.1 +DEFAULT: '#%s' +--DESCRIPTION-- + +

+ Location of configuration documentation to link to, let %s substitute + into the configuration's namespace and directive names sans the percent + sign. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt new file mode 100755 index 00000000..6097a557 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt @@ -0,0 +1,3 @@ +AutoFormatParam +DESCRIPTION: Configuration for customizing auto-formatting functionality +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt new file mode 100755 index 00000000..d14b4909 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt @@ -0,0 +1,3 @@ +CSS +DESCRIPTION: Configuration regarding allowed CSS. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt new file mode 100755 index 00000000..57f30239 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt @@ -0,0 +1,3 @@ +Cache +DESCRIPTION: Configuration for DefinitionCache and related subclasses. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt index c572c14e..08b381d3 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt @@ -24,6 +24,5 @@ array ( --DESCRIPTION-- Lookup array of color names to six digit hexadecimal number corresponding -to color, with preceding hash mark. Used when parsing colors. The lookup -is done in a case-insensitive manner. +to color, with preceding hash mark. Used when parsing colors. --# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt deleted file mode 100644 index ce243c35..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt +++ /dev/null @@ -1,9 +0,0 @@ -Core.EnableIDNA -TYPE: bool -DEFAULT: false -VERSION: 4.4.0 ---DESCRIPTION-- -Allows international domain names in URLs. This configuration option -requires the PEAR Net_IDNA2 module to be installed. It operates by -punycoding any internationalized host names for maximum portability. ---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt new file mode 100755 index 00000000..5edfe078 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt @@ -0,0 +1,3 @@ +Core +DESCRIPTION: Core features that are always available. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt new file mode 100755 index 00000000..f2d25a1b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt @@ -0,0 +1,3 @@ +Filter +DESCRIPTION: Directives for turning filters on and off, or specifying custom filters. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt new file mode 100755 index 00000000..d436ed01 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt @@ -0,0 +1,14 @@ +FilterParam.ExtractStyleBlocksEscaping +TYPE: bool +VERSION: 3.0.0 +DEFAULT: true +ALIASES: Filter.ExtractStyleBlocksEscaping +--DESCRIPTION-- + +

+ Whether or not to escape the dangerous characters <, > and & + as \3C, \3E and \26, respectively. This is can be safely set to false + if the contents of StyleBlocks will be placed in an external stylesheet, + where there is no risk of it being interpreted as HTML. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt new file mode 100755 index 00000000..3943529c --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt @@ -0,0 +1,29 @@ +FilterParam.ExtractStyleBlocksScope +TYPE: string/null +VERSION: 3.0.0 +DEFAULT: NULL +ALIASES: Filter.ExtractStyleBlocksScope +--DESCRIPTION-- + +

+ If you would like users to be able to define external stylesheets, but + only allow them to specify CSS declarations for a specific node and + prevent them from fiddling with other elements, use this directive. + It accepts any valid CSS selector, and will prepend this to any + CSS declaration extracted from the document. For example, if this + directive is set to #user-content and a user uses the + selector a:hover, the final selector will be + #user-content a:hover. +

+

+ The comma shorthand may be used; consider the above example, with + #user-content, #user-content2, the final selector will + be #user-content a:hover, #user-content2 a:hover. +

+

+ Warning: It is possible for users to bypass this measure + using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML + Purifier, and I am working to get it fixed. Until then, HTML Purifier + performs a basic check to prevent this. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt new file mode 100755 index 00000000..cafccf8b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt @@ -0,0 +1,15 @@ +FilterParam.ExtractStyleBlocksTidyImpl +TYPE: mixed/null +VERSION: 3.1.0 +DEFAULT: NULL +--DESCRIPTION-- +

+ If left NULL, HTML Purifier will attempt to instantiate a csstidy + class to use for internal cleaning. This will usually be good enough. +

+

+ However, for trusted user input, you can set this to false to + disable cleaning. In addition, you can supply your own concrete implementation + of Tidy's interface to use, although I don't know why you'd want to do that. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt new file mode 100755 index 00000000..dff9784b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt @@ -0,0 +1,3 @@ +FilterParam +DESCRIPTION: Configuration for filters. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt deleted file mode 100644 index 140e2142..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt +++ /dev/null @@ -1,10 +0,0 @@ -HTML.AllowedComments -TYPE: lookup -VERSION: 4.4.0 -DEFAULT: array() ---DESCRIPTION-- -A whitelist which indicates what explicit comment bodies should be -allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp -(these directives are union'ed together, so a comment is considered -valid if any directive deems it valid.) ---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt deleted file mode 100644 index f22e977d..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt +++ /dev/null @@ -1,15 +0,0 @@ -HTML.AllowedCommentsRegexp -TYPE: string/null -VERSION: 4.4.0 -DEFAULT: NULL ---DESCRIPTION-- -A regexp, which if it matches the body of a comment, indicates that -it should be allowed. Trailing and leading spaces are removed prior -to running this regular expression. -Warning: Make sure you specify -correct anchor metacharacters ^regex$, otherwise you may accept -comments that you did not mean to! In particular, the regex /foo|bar/ -is probably not sufficiently strict, since it also allows foobar. -See also %HTML.AllowedComments (these directives are union'ed together, -so a comment is considered valid if any directive deems it valid.) ---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt deleted file mode 100644 index 5eb6ec2b..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt +++ /dev/null @@ -1,13 +0,0 @@ -HTML.SafeIframe -TYPE: bool -VERSION: 4.4.0 -DEFAULT: false ---DESCRIPTION-- -

- Whether or not to permit iframe tags in untrusted documents. This - directive must be accompanied by a whitelist of permitted iframes, - such as %URI.SafeIframeRegexp, otherwise it will fatally error. - This directive has no effect on strict doctypes, as iframes are not - valid. -

---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt deleted file mode 100644 index 587a1677..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt +++ /dev/null @@ -1,8 +0,0 @@ -HTML.TargetBlank -TYPE: bool -VERSION: 4.4.0 -DEFAULT: FALSE ---DESCRIPTION-- -If enabled, target=blank attributes are added to all outgoing links. -(This includes links from an HTTPS version of a page to an HTTP version.) ---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt new file mode 100755 index 00000000..f32ceb5b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt @@ -0,0 +1,3 @@ +HTML +DESCRIPTION: Configuration regarding allowed HTML. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt new file mode 100755 index 00000000..7849d60d --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt @@ -0,0 +1,3 @@ +Output +DESCRIPTION: Configuration relating to the generation of (X)HTML. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt new file mode 100755 index 00000000..5025f9d1 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt @@ -0,0 +1,3 @@ +Test +DESCRIPTION: Developer testing configuration for our unit tests. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt deleted file mode 100644 index 79084832..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt +++ /dev/null @@ -1,22 +0,0 @@ -URI.SafeIframeRegexp -TYPE: string/null -VERSION: 4.4.0 -DEFAULT: NULL ---DESCRIPTION-- -

- A PCRE regular expression that will be matched against an iframe URI. This is - a relatively inflexible scheme, but works well enough for the most common - use-case of iframes: embedded video. This directive only has an effect if - %HTML.SafeIframe is enabled. Here are some example values: -

- -

- Note that this directive does not give you enough granularity to, say, disable - all autoplay videos. Pipe up on the HTML Purifier forums if this - is a capability you want. -

---# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt new file mode 100755 index 00000000..a13060f3 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt @@ -0,0 +1,3 @@ +URI +DESCRIPTION: Features regarding Uniform Resource Identifiers. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php index 9fa76bd1..2b3140ca 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php @@ -19,68 +19,6 @@ class HTMLPurifier_Encoder */ public static function muteErrorHandler() {} - /** - * iconv wrapper which mutes errors, but doesn't work around bugs. - */ - public static function unsafeIconv($in, $out, $text) { - set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); - $r = iconv($in, $out, $text); - restore_error_handler(); - return $r; - } - - /** - * iconv wrapper which mutes errors and works around bugs. - */ - public static function iconv($in, $out, $text, $max_chunk_size = 8000) { - $code = self::testIconvTruncateBug(); - if ($code == self::ICONV_OK) { - return self::unsafeIconv($in, $out, $text); - } elseif ($code == self::ICONV_TRUNCATES) { - // we can only work around this if the input character set - // is utf-8 - if ($in == 'utf-8') { - if ($max_chunk_size < 4) { - trigger_error('max_chunk_size is too small', E_USER_WARNING); - return false; - } - // split into 8000 byte chunks, but be careful to handle - // multibyte boundaries properly - if (($c = strlen($text)) <= $max_chunk_size) { - return self::unsafeIconv($in, $out, $text); - } - $r = ''; - $i = 0; - while (true) { - if ($i + $max_chunk_size >= $c) { - $r .= self::unsafeIconv($in, $out, substr($text, $i)); - break; - } - // wibble the boundary - if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { - $chunk_size = $max_chunk_size; - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { - $chunk_size = $max_chunk_size - 1; - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { - $chunk_size = $max_chunk_size - 2; - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { - $chunk_size = $max_chunk_size - 3; - } else { - return false; // rather confusing UTF-8... - } - $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths - $r .= self::unsafeIconv($in, $out, $chunk); - $i += $chunk_size; - } - return $r; - } else { - return false; - } - } else { - return false; - } - } - /** * Cleans a UTF-8 string for well-formedness and SGML validity * @@ -322,14 +260,6 @@ class HTMLPurifier_Encoder return $ret; } - public static function iconvAvailable() { - static $iconv = null; - if ($iconv === null) { - $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; - } - return $iconv; - } - /** * Converts a string to UTF-8 based on configuration. */ @@ -337,22 +267,25 @@ class HTMLPurifier_Encoder $encoding = $config->get('Core.Encoding'); if ($encoding === 'utf-8') return $str; static $iconv = null; - if ($iconv === null) $iconv = self::iconvAvailable(); + if ($iconv === null) $iconv = function_exists('iconv'); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if ($iconv && !$config->get('Test.ForceNoIconv')) { - // unaffected by bugs, since UTF-8 support all characters - $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); + $str = iconv($encoding, 'utf-8//IGNORE', $str); if ($str === false) { // $encoding is not a valid encoding + restore_error_handler(); trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); return ''; } // If the string is bjorked by Shift_JIS or a similar encoding // that doesn't support all of ASCII, convert the naughty // characters to their true byte-wise ASCII/UTF-8 equivalents. - $str = strtr($str, self::testEncodingSupportsASCII($encoding)); + $str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding)); + restore_error_handler(); return $str; } elseif ($encoding === 'iso-8859-1') { $str = utf8_encode($str); + restore_error_handler(); return $str; } trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); @@ -365,15 +298,16 @@ class HTMLPurifier_Encoder */ public static function convertFromUTF8($str, $config, $context) { $encoding = $config->get('Core.Encoding'); - if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { - $str = self::convertToASCIIDumbLossless($str); - } if ($encoding === 'utf-8') return $str; static $iconv = null; - if ($iconv === null) $iconv = self::iconvAvailable(); + if ($iconv === null) $iconv = function_exists('iconv'); + if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { + $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); + } + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if ($iconv && !$config->get('Test.ForceNoIconv')) { // Undo our previous fix in convertToUTF8, otherwise iconv will barf - $ascii_fix = self::testEncodingSupportsASCII($encoding); + $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); if (!$escape && !empty($ascii_fix)) { $clear_fix = array(); foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; @@ -381,17 +315,15 @@ class HTMLPurifier_Encoder } $str = strtr($str, array_flip($ascii_fix)); // Normal stuff - $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); + $str = iconv('utf-8', $encoding . '//IGNORE', $str); + restore_error_handler(); return $str; } elseif ($encoding === 'iso-8859-1') { $str = utf8_decode($str); + restore_error_handler(); return $str; } trigger_error('Encoding not supported', E_USER_ERROR); - // You might be tempted to assume that the ASCII representation - // might be OK, however, this is *not* universally true over all - // encodings. So we take the conservative route here, rather - // than forcibly turn on %Core.EscapeNonASCIICharacters } /** @@ -441,49 +373,6 @@ class HTMLPurifier_Encoder return $result; } - /** No bugs detected in iconv. */ - const ICONV_OK = 0; - - /** Iconv truncates output if converting from UTF-8 to another - * character set with //IGNORE, and a non-encodable character is found */ - const ICONV_TRUNCATES = 1; - - /** Iconv does not support //IGNORE, making it unusable for - * transcoding purposes */ - const ICONV_UNUSABLE = 2; - - /** - * glibc iconv has a known bug where it doesn't handle the magic - * //IGNORE stanza correctly. In particular, rather than ignore - * characters, it will return an EILSEQ after consuming some number - * of characters, and expect you to restart iconv as if it were - * an E2BIG. Old versions of PHP did not respect the errno, and - * returned the fragment, so as a result you would see iconv - * mysteriously truncating output. We can work around this by - * manually chopping our input into segments of about 8000 - * characters, as long as PHP ignores the error code. If PHP starts - * paying attention to the error code, iconv becomes unusable. - * - * @returns Error code indicating severity of bug. - */ - public static function testIconvTruncateBug() { - static $code = null; - if ($code === null) { - // better not use iconv, otherwise infinite loop! - $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); - if ($r === false) { - $code = self::ICONV_UNUSABLE; - } elseif (($c = strlen($r)) < 9000) { - $code = self::ICONV_TRUNCATES; - } elseif ($c > 9000) { - trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); - } else { - $code = self::ICONV_OK; - } - } - return $code; - } - /** * This expensive function tests whether or not a given character * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will @@ -496,11 +385,6 @@ class HTMLPurifier_Encoder * which can be used to "undo" any overzealous iconv action. */ public static function testEncodingSupportsASCII($encoding, $bypass = false) { - // All calls to iconv here are unsafe, proof by case analysis: - // If ICONV_OK, no difference. - // If ICONV_TRUNCATE, all calls involve one character inputs, - // so bug is not triggered. - // If ICONV_UNUSABLE, this call is irrelevant static $encodings = array(); if (!$bypass) { if (isset($encodings[$encoding])) return $encodings[$encoding]; @@ -514,22 +398,24 @@ class HTMLPurifier_Encoder if (strpos($lenc, 'iso-8859-') === 0) return array(); } $ret = array(); - if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + if (iconv('UTF-8', $encoding, 'a') === false) return false; for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars $c = chr($i); // UTF-8 char - $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion + $r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion if ( $r === '' || // This line is needed for iconv implementations that do not // omit characters that do not exist in the target character set - ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) + ($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c) ) { // Reverse engineer: what's the UTF-8 equiv of this byte // sequence? This assumes that there's no variable width // encoding that doesn't support ASCII. - $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; + $ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c; } } + restore_error_handler(); $encodings[$encoding] = $ret; return $ret; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php index 320aa4f1..bbf78a66 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -1,11 +1,5 @@ blocks from input HTML, cleans them up * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') @@ -27,15 +21,8 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter private $_styleMatches = array(); private $_tidy; - private $_id_attrdef; - private $_class_attrdef; - private $_enum_attrdef; - public function __construct() { $this->_tidy = new csstidy(); - $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); - $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); - $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus')); } /** @@ -90,166 +77,27 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter $css = substr($css, 0, -3); } $css = trim($css); - set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); $this->_tidy->parse($css); - restore_error_handler(); $css_definition = $config->getDefinition('CSS'); - $html_definition = $config->getDefinition('HTML'); - $new_css = array(); foreach ($this->_tidy->css as $k => $decls) { // $decls are all CSS declarations inside an @ selector $new_decls = array(); foreach ($decls as $selector => $style) { $selector = trim($selector); if ($selector === '') continue; // should not happen - // Parse the selector - // Here is the relevant part of the CSS grammar: - // - // ruleset - // : selector [ ',' S* selector ]* '{' ... - // selector - // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? - // combinator - // : '+' S* - // : '>' S* - // simple_selector - // : element_name [ HASH | class | attrib | pseudo ]* - // | [ HASH | class | attrib | pseudo ]+ - // element_name - // : IDENT | '*' - // ; - // class - // : '.' IDENT - // ; - // attrib - // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* - // [ IDENT | STRING ] S* ]? ']' - // ; - // pseudo - // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] - // ; - // - // For reference, here are the relevant tokens: - // - // HASH #{name} - // IDENT {ident} - // INCLUDES == - // DASHMATCH |= - // STRING {string} - // FUNCTION {ident}\( - // - // And the lexical scanner tokens - // - // name {nmchar}+ - // nmchar [_a-z0-9-]|{nonascii}|{escape} - // nonascii [\240-\377] - // escape {unicode}|\\[^\r\n\f0-9a-f] - // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? - // ident -?{nmstart}{nmchar*} - // nmstart [_a-z]|{nonascii}|{escape} - // string {string1}|{string2} - // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" - // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' - // - // We'll implement a subset (in order to reduce attack - // surface); in particular: - // - // - No Unicode support - // - No escapes support - // - No string support (by proxy no attrib support) - // - element_name is matched against allowed - // elements (some people might find this - // annoying...) - // - Pseudo-elements one of :first-child, :link, - // :visited, :active, :hover, :focus - - // handle ruleset - $selectors = array_map('trim', explode(',', $selector)); - $new_selectors = array(); - foreach ($selectors as $sel) { - // split on +, > and spaces - $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); - // even indices are chunks, odd indices are - // delimiters - $nsel = null; - $delim = null; // guaranteed to be non-null after - // two loop iterations - for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { - $x = $basic_selectors[$i]; - if ($i % 2) { - // delimiter - if ($x === ' ') { - $delim = ' '; - } else { - $delim = ' ' . $x . ' '; - } - } else { - // simple selector - $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); - $sdelim = null; - $nx = null; - for ($j = 0, $cc = count($components); $j < $cc; $j ++) { - $y = $components[$j]; - if ($j === 0) { - if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { - $nx = $y; - } else { - // $nx stays null; this matters - // if we don't manage to find - // any valid selector content, - // in which case we ignore the - // outer $delim - } - } elseif ($j % 2) { - // set delimiter - $sdelim = $y; - } else { - $attrdef = null; - if ($sdelim === '#') { - $attrdef = $this->_id_attrdef; - } elseif ($sdelim === '.') { - $attrdef = $this->_class_attrdef; - } elseif ($sdelim === ':') { - $attrdef = $this->_enum_attrdef; - } else { - throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); - } - $r = $attrdef->validate($y, $config, $context); - if ($r !== false) { - if ($r !== true) { - $y = $r; - } - if ($nx === null) { - $nx = ''; - } - $nx .= $sdelim . $y; - } - } - } - if ($nx !== null) { - if ($nsel === null) { - $nsel = $nx; - } else { - $nsel .= $delim . $nx; - } - } else { - // delimiters to the left of invalid - // basic selector ignored - } - } - } - if ($nsel !== null) { - if (!empty($scopes)) { - foreach ($scopes as $s) { - $new_selectors[] = "$s $nsel"; - } - } else { - $new_selectors[] = $nsel; + if ($selector[0] === '+') { + if ($selector !== '' && $selector[0] === '+') continue; + } + if (!empty($scopes)) { + $new_selector = array(); // because multiple ones are possible + $selectors = array_map('trim', explode(',', $selector)); + foreach ($scopes as $s1) { + foreach ($selectors as $s2) { + $new_selector[] = "$s1 $s2"; } } + $selector = implode(', ', $new_selector); // now it's a string } - if (empty($new_selectors)) continue; - $selector = implode(', ', $new_selectors); foreach ($style as $name => $value) { if (!isset($css_definition->info[$name])) { unset($style[$name]); @@ -262,11 +110,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter } $new_decls[$selector] = $style; } - $new_css[$k] = $new_decls; + $this->_tidy->css[$k] = $new_decls; } // remove stuff that shouldn't be used, could be reenabled // after security risks are analyzed - $this->_tidy->css = $new_css; $this->_tidy->import = array(); $this->_tidy->charset = null; $this->_tidy->namespace = null; diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php index b079d44c..33bb38ac 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php @@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition return $this->_anonModule; } - private $_anonModule = null; + private $_anonModule; // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php index b963529a..44c22f6f 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php @@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule 'name' => 'CDATA', 'readonly' => 'Bool#readonly', 'size' => 'Number', - 'src' => 'URI#embedded', + 'src' => 'URI#embeds', 'tabindex' => 'Number', 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', 'value' => 'CDATA', @@ -84,8 +84,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule $button->excludes = $this->makeLookup( 'form', 'fieldset', // Form 'input', 'select', 'textarea', 'label', 'button', // Formctrl - 'a', // as per HTML 4.01 spec, this is omitted by modularization - 'isindex', 'iframe' // legacy items + 'a' // as per HTML 4.01 spec, this is omitted by modularization ); // Extra exclusion: img usemap="" is not permitted within this element. diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php deleted file mode 100644 index 287071ed..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php +++ /dev/null @@ -1,38 +0,0 @@ -get('HTML.SafeIframe')) { - $this->safe = true; - } - $this->addElement( - 'iframe', 'Inline', 'Flow', 'Common', - array( - 'src' => 'URI#embedded', - 'width' => 'Length', - 'height' => 'Length', - 'name' => 'ID', - 'scrolling' => 'Enum#yes,no,auto', - 'frameborder' => 'Enum#0,1', - 'longdesc' => 'URI', - 'marginheight' => 'Pixels', - 'marginwidth' => 'Pixels', - ) - ); - } - -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php index f278eece..df33927b 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php @@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule $hr->attr['width'] = 'Length'; $img = $this->addBlankElement('img'); - $img->attr['align'] = 'IAlign'; + $img->attr['align'] = 'Enum#top,middle,bottom,left,right'; $img->attr['border'] = 'Pixels'; $img->attr['hspace'] = 'Pixels'; $img->attr['vspace'] = 'Pixels'; @@ -136,22 +136,6 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule $ul->attr['compact'] = 'Bool#compact'; $ul->attr['type'] = 'Enum#square,disc,circle'; - // "safe" modifications to "unsafe" elements - // WARNING: If you want to add support for an unsafe, legacy - // attribute, make a new TrustedLegacy module with the trusted - // bit set appropriately - - $form = $this->addBlankElement('form'); - $form->content_model = 'Flow | #PCDATA'; - $form->content_model_type = 'optional'; - $form->attr['target'] = 'FrameTarget'; - - $input = $this->addBlankElement('input'); - $input->attr['align'] = 'IAlign'; - - $legend = $this->addBlankElement('legend'); - $legend->attr['align'] = 'LAlign'; - } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php index 79ccefaf..74d4522f 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php @@ -20,16 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule public $content_sets = array('Flow' => 'List'); public function setup($config) { - $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); - $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); - // XXX The wrap attribute is handled by MakeWellFormed. This is all - // quite unsatisfactory, because we generated this - // *specifically* for lists, and now a big chunk of the handling - // is done properly by the List ChildDef. So actually, we just - // want enough information to make autoclosing work properly, - // and then hand off the tricky stuff to the ChildDef. - $ol->wrap = 'li'; - $ul->wrap = 'li'; + $ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); + $ol->wrap = "li"; + $ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ul->wrap = "li"; $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('li', false, 'Flow', 'Common'); diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php index 45c42bb3..f314ced3 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php @@ -37,9 +37,6 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule 'abbr' => 'Text', 'colspan' => 'Number', 'rowspan' => 'Number', - // Apparently, as of HTML5 this attribute only applies - // to 'th' elements. - 'scope' => 'Enum#row,col,rowgroup,colgroup', ), $cell_align ); diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php deleted file mode 100644 index e1305ec5..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php +++ /dev/null @@ -1,19 +0,0 @@ -addBlankElement('a'); - $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank(); - } - -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php index 7a06fc02..362e3b78 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php @@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // Unsafe: - 'Scripting', 'Object', 'Forms', + 'Scripting', 'Object', 'Forms', // Sorta legacy, but present in strict: 'Name', ); - $transitional = array('Legacy', 'Target', 'Iframe'); + $transitional = array('Legacy', 'Target'); $xml = array('XMLCommonAttributes'); $non_xml = array('NonXMLCommonAttributes'); @@ -112,9 +112,7 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.1', true, - // Iframe is a real XHTML 1.1 module, despite being - // "transitional"! - array_merge($common, $xml, array('Ruby', 'Iframe')), + array_merge($common, $xml, array('Ruby')), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', @@ -231,9 +229,6 @@ class HTMLPurifier_HTMLModuleManager if ($config->get('HTML.Nofollow')) { $modules[] = 'Nofollow'; } - if ($config->get('HTML.TargetBlank')) { - $modules[] = 'TargetBlank'; - } // merge in custom modules $modules = array_merge($modules, $this->userModules); @@ -369,13 +364,6 @@ class HTMLPurifier_HTMLModuleManager // :TODO: // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end - // HOWEVER, it is perfectly valid for a non-standalone - // definition to lack a standalone definition, even - // after all processing: this allows us to safely - // specify extra attributes for elements that may not be - // enabled all in one place. In particular, this might - // be the case for trusted elements. WARNING: care must - // be taken that the /extra/ definitions are all safe. continue; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php new file mode 100644 index 00000000..1d358c7b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php @@ -0,0 +1,139 @@ +tokens = array(); + $this->last_token_was_empty = false; + + $string = $this->normalize($string, $config, $context); + + $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); + + $parser = new XML_HTMLSax3(); + $parser->set_object($this); + $parser->set_element_handler('openHandler','closeHandler'); + $parser->set_data_handler('dataHandler'); + $parser->set_escape_handler('escapeHandler'); + + // doesn't seem to work correctly for attributes + $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); + + $parser->parse($string); + + restore_error_handler(); + + return $this->tokens; + + } + + /** + * Open tag event handler, interface is defined by PEAR package. + */ + public function openHandler(&$parser, $name, $attrs, $closed) { + // entities are not resolved in attrs + foreach ($attrs as $key => $attr) { + $attrs[$key] = $this->parseData($attr); + } + if ($closed) { + $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); + $this->last_token_was_empty = true; + } else { + $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); + } + $this->stack[] = $name; + return true; + } + + /** + * Close tag event handler, interface is defined by PEAR package. + */ + public function closeHandler(&$parser, $name) { + // HTMLSax3 seems to always send empty tags an extra close tag + // check and ignore if you see it: + // [TESTME] to make sure it doesn't overreach + if ($this->last_token_was_empty) { + $this->last_token_was_empty = false; + return true; + } + $this->tokens[] = new HTMLPurifier_Token_End($name); + if (!empty($this->stack)) array_pop($this->stack); + return true; + } + + /** + * Data event handler, interface is defined by PEAR package. + */ + public function dataHandler(&$parser, $data) { + $this->last_token_was_empty = false; + $this->tokens[] = new HTMLPurifier_Token_Text($data); + return true; + } + + /** + * Escaped text handler, interface is defined by PEAR package. + */ + public function escapeHandler(&$parser, $data) { + if (strpos($data, '--') === 0) { + // remove trailing and leading double-dashes + $data = substr($data, 2); + if (strlen($data) >= 2 && substr($data, -2) == "--") { + $data = substr($data, 0, -2); + } + if (isset($this->stack[sizeof($this->stack) - 1]) && + $this->stack[sizeof($this->stack) - 1] == "style") { + $this->tokens[] = new HTMLPurifier_Token_Text($data); + } else { + $this->tokens[] = new HTMLPurifier_Token_Comment($data); + } + $this->last_token_was_empty = false; + } + // CDATA is handled elsewhere, but if it was handled here: + //if (strpos($data, '[CDATA[') === 0) { + // $this->tokens[] = new HTMLPurifier_Token_Text( + // substr($data, 7, strlen($data) - 9) ); + //} + return true; + } + + /** + * An error handler that mutes strict errors + */ + public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { + if ($errno == E_STRICT) return; + return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php index 92aefd33..816490b7 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php @@ -11,6 +11,8 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy */ protected $strategies = array(); + abstract public function __construct(); + public function execute($tokens, $config, $context) { foreach ($this->strategies as $strategy) { $tokens = $strategy->execute($tokens, $config, $context); diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php index bccaf14d..cf3a33e4 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -21,9 +21,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // currently only used to determine if comments should be kept $trusted = $config->get('HTML.Trusted'); - $comment_lookup = $config->get('HTML.AllowedComments'); - $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); - $check_comments = $comment_lookup !== array() || $comment_regexp !== null; $remove_script_contents = $config->get('Core.RemoveScriptContents'); $hidden_elements = $config->get('Core.HiddenElements'); @@ -131,36 +128,22 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy if ($textify_comments !== false) { $data = $token->data; $token = new HTMLPurifier_Token_Text($data); - } elseif ($trusted || $check_comments) { - // always cleanup comments - $trailing_hyphen = false; + } elseif ($trusted) { + // keep, but perform comment cleaning if ($e) { // perform check whether or not there's a trailing hyphen if (substr($token->data, -1) == '-') { - $trailing_hyphen = true; + $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); } } $token->data = rtrim($token->data, '-'); $found_double_hyphen = false; while (strpos($token->data, '--') !== false) { - $found_double_hyphen = true; - $token->data = str_replace('--', '-', $token->data); - } - if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) { - // OK good - if ($e) { - if ($trailing_hyphen) { - $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); - } - if ($found_double_hyphen) { - $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); - } + if ($e && !$found_double_hyphen) { + $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); } - } else { - if ($e) { - $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); - } - continue; + $found_double_hyphen = true; // prevent double-erroring + $token->data = str_replace('--', '-', $token->data); } } else { // strip comments diff --git a/lib/htmlpurifier/library/HTMLPurifier/URI.php b/lib/htmlpurifier/library/HTMLPurifier/URI.php index f158ef5e..efdfb2c6 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URI.php @@ -40,7 +40,7 @@ class HTMLPurifier_URI } else { // no scheme: retrieve the default one $def = $config->getDefinition('URI'); - $scheme_obj = $def->getDefaultScheme($config, $context); + $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); if (!$scheme_obj) { // something funky happened to the default scheme object trigger_error( @@ -199,44 +199,6 @@ class HTMLPurifier_URI return $result; } - /** - * Returns true if this URL might be considered a 'local' URL given - * the current context. This is true when the host is null, or - * when it matches the host supplied to the configuration. - * - * Note that this does not do any scheme checking, so it is mostly - * only appropriate for metadata that doesn't care about protocol - * security. isBenign is probably what you actually want. - */ - public function isLocal($config, $context) { - if ($this->host === null) return true; - $uri_def = $config->getDefinition('URI'); - if ($uri_def->host === $this->host) return true; - return false; - } - - /** - * Returns true if this URL should be considered a 'benign' URL, - * that is: - * - * - It is a local URL (isLocal), and - * - It has a equal or better level of security - */ - public function isBenign($config, $context) { - if (!$this->isLocal($config, $context)) return false; - - $scheme_obj = $this->getSchemeObj($config, $context); - if (!$scheme_obj) return false; // conservative approach - - $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); - if ($current_scheme_obj->secure) { - if (!$scheme_obj->secure) { - return false; - } - } - return true; - } - } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php index 40e57bb7..ea2b8fe2 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php @@ -27,7 +27,6 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); - $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe()); $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); $this->registerFilter(new HTMLPurifier_URIFilter_Munge()); } @@ -53,13 +52,9 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition protected function setupFilters($config) { foreach ($this->registeredFilters as $name => $filter) { - if ($filter->always_load) { + $conf = $config->get('URI.' . $name); + if ($conf !== false && $conf !== null) { $this->addFilter($filter, $config); - } else { - $conf = $config->get('URI.' . $name); - if ($conf !== false && $conf !== null) { - $this->addFilter($filter, $config); - } } } unset($this->registeredFilters); @@ -77,10 +72,6 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); } - public function getDefaultScheme($config, $context) { - return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context); - } - public function filter(&$uri, $config, $context) { foreach ($this->filters as $name => $f) { $result = $f->filter($uri, $config, $context); diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php index 6a1b0b08..c116f93d 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php @@ -4,21 +4,7 @@ * Chainable filters for custom URI processing. * * These filters can perform custom actions on a URI filter object, - * including transformation or blacklisting. A filter named Foo - * must have a corresponding configuration directive %URI.Foo, - * unless always_load is specified to be true. - * - * The following contexts may be available while URIFilters are being - * processed: - * - * - EmbeddedURI: true if URI is an embedded resource that will - * be loaded automatically on page load - * - CurrentToken: a reference to the token that is currently - * being processed - * - CurrentAttr: the name of the attribute that is currently being - * processed - * - CurrentCSSProperty: the name of the CSS property that is - * currently being processed (if applicable) + * including transformation or blacklisting. * * @warning This filter is called before scheme object validation occurs. * Make sure, if you require a specific scheme object, you @@ -39,15 +25,7 @@ abstract class HTMLPurifier_URIFilter public $post = false; /** - * True if this filter should always be loaded (this permits - * a filter to be named Foo without the corresponding %URI.Foo - * directive existing.) - */ - public $always_load = false; - - /** - * Performs initialization for the filter. If the filter returns - * false, this means that it shouldn't be considered active. + * Performs initialization for the filter */ public function prepare($config) {return true;} diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php index 55fde3bf..045aa099 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php @@ -1,9 +1,5 @@ getSchemeObj($config, $context); if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it - if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way - if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL + if (is_null($uri->host) || empty($scheme_obj->browsable)) { + return true; + } + // don't redirect if target host is our host + if ($uri->host === $config->getDefinition('URI')->host) { + return true; + } $this->makeReplace($uri, $config, $context); $this->replace = array_map('rawurlencode', $this->replace); diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php deleted file mode 100644 index 284bb13d..00000000 --- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php +++ /dev/null @@ -1,35 +0,0 @@ -regexp = $config->get('URI.SafeIframeRegexp'); - return true; - } - public function filter(&$uri, $config, $context) { - // check if filter not applicable - if (!$config->get('HTML.SafeIframe')) return true; - // check if the filter should actually trigger - if (!$context->get('EmbeddedURI', true)) return true; - $token = $context->get('CurrentToken', true); - if (!($token && $token->name == 'iframe')) return true; - // check if we actually have some whitelists enabled - if ($this->regexp === null) return false; - // actually check the whitelists - return preg_match($this->regexp, $uri->toString()); - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php index 7be95814..25eb8410 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php @@ -19,12 +19,6 @@ abstract class HTMLPurifier_URIScheme */ public $browsable = false; - /** - * Whether or not data transmitted over this scheme is encrypted. - * https is secure, http is not. - */ - public $secure = false; - /** * Whether or not the URI always uses , resolves edge cases * with making relative URIs absolute diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php index 159c2874..29e38091 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php @@ -6,7 +6,6 @@ class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http { public $default_port = 443; - public $secure = true; }