Revert "Update HTML Purifier to version 4.4.0."

author Andrew Dolgov <fox@madoka.volgo-balt.ru>

Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)

committer Andrew Dolgov <fox@madoka.volgo-balt.ru>

Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)
author Andrew Dolgov <fox@madoka.volgo-balt.ru>
Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)
committer Andrew Dolgov <fox@madoka.volgo-balt.ru>
Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)
diff --git a/lib/htmlpurifier/library/HTMLPurifier.includes.php b/lib/htmlpurifier/library/HTMLPurifier.includes.php

index 0ceff6a9267a70890d4e35c4e495068eca043f27..b9baf8f0a201b5bf3e77fe430087dbfa194e6d5e 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier.includes.php
+++ b/lib/htmlpurifier/library/HTMLPurifier.includes.php
@@ -7,7 +7,7 @@
   * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
   * FILE, changes will be overwritten the next time the script is run.
   *
- * @version 4.4.0
+ * @version 4.3.0
   *
   * @warning
   *      You must *not* include any other HTML Purifier files before this file,
@@ -73,7 +73,6 @@ require 'HTMLPurifier/UnitConverter.php';
  require 'HTMLPurifier/VarParser.php';
  require 'HTMLPurifier/VarParserException.php';
  require 'HTMLPurifier/AttrDef/CSS.php';
-require 'HTMLPurifier/AttrDef/Clone.php';
  require 'HTMLPurifier/AttrDef/Enum.php';
  require 'HTMLPurifier/AttrDef/Integer.php';
  require 'HTMLPurifier/AttrDef/Lang.php';
@@ -91,7 +90,6 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
  require 'HTMLPurifier/AttrDef/CSS/Filter.php';
  require 'HTMLPurifier/AttrDef/CSS/Font.php';
  require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
-require 'HTMLPurifier/AttrDef/CSS/Ident.php';
  require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
  require 'HTMLPurifier/AttrDef/CSS/Length.php';
  require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -132,12 +130,10 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
  require 'HTMLPurifier/AttrTransform/SafeObject.php';
  require 'HTMLPurifier/AttrTransform/SafeParam.php';
  require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
-require 'HTMLPurifier/AttrTransform/TargetBlank.php';
  require 'HTMLPurifier/AttrTransform/Textarea.php';
  require 'HTMLPurifier/ChildDef/Chameleon.php';
  require 'HTMLPurifier/ChildDef/Custom.php';
  require 'HTMLPurifier/ChildDef/Empty.php';
-require 'HTMLPurifier/ChildDef/List.php';
  require 'HTMLPurifier/ChildDef/Required.php';
  require 'HTMLPurifier/ChildDef/Optional.php';
  require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -152,7 +148,6 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
  require 'HTMLPurifier/HTMLModule/Edit.php';
  require 'HTMLPurifier/HTMLModule/Forms.php';
  require 'HTMLPurifier/HTMLModule/Hypertext.php';
-require 'HTMLPurifier/HTMLModule/Iframe.php';
  require 'HTMLPurifier/HTMLModule/Image.php';
  require 'HTMLPurifier/HTMLModule/Legacy.php';
  require 'HTMLPurifier/HTMLModule/List.php';
@@ -169,7 +164,6 @@ require 'HTMLPurifier/HTMLModule/Scripting.php';
  require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
  require 'HTMLPurifier/HTMLModule/Tables.php';
  require 'HTMLPurifier/HTMLModule/Target.php';
-require 'HTMLPurifier/HTMLModule/TargetBlank.php';
  require 'HTMLPurifier/HTMLModule/Text.php';
  require 'HTMLPurifier/HTMLModule/Tidy.php';
  require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -208,7 +202,6 @@ require 'HTMLPurifier/URIFilter/DisableResources.php';
  require 'HTMLPurifier/URIFilter/HostBlacklist.php';
  require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
  require 'HTMLPurifier/URIFilter/Munge.php';
-require 'HTMLPurifier/URIFilter/SafeIframe.php';
  require 'HTMLPurifier/URIScheme/data.php';
  require 'HTMLPurifier/URIScheme/file.php';
  require 'HTMLPurifier/URIScheme/ftp.php';
diff --git a/lib/htmlpurifier/library/HTMLPurifier.php b/lib/htmlpurifier/library/HTMLPurifier.php

index e599e1c0c9b56710f7f988b2269557eaf9e26d0d..914ba25ae6d6318223e39468e87d7bea0c1f816d 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier.php
+++ b/lib/htmlpurifier/library/HTMLPurifier.php
@@ -19,7 +19,7 @@
   */
  
  /*
-    HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
+    HTML Purifier 4.3.0 - Standards Compliant HTML Filtering
      Copyright (C) 2006-2008 Edward Z. Yang
  
      This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
  {
  
      /** Version of HTML Purifier */
-    public $version = '4.4.0';
+    public $version = '4.3.0';
  
      /** Constant with version of HTML Purifier */
-    const VERSION = '4.4.0';
+    const VERSION = '4.3.0';
  
      /** Global configuration object */
      public $config;
diff --git a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php

index d49b196c429ea75d4bd2a1237fc2d3adb0a9fea3..a5c0d5bb80cd73a1a74909d89142ca451b684a90 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php
+++ b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php
@@ -67,7 +67,6 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
  require_once $__dir . '/HTMLPurifier/VarParser.php';
  require_once $__dir . '/HTMLPurifier/VarParserException.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
-require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
@@ -85,7 +84,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
-require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
  require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -126,12 +124,10 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
  require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
  require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
  require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
-require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
  require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
-require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
  require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -146,7 +142,6 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
-require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
@@ -163,7 +158,6 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
-require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
  require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -202,7 +196,6 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
  require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
  require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
  require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
-require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
  require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
  require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
  require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php

deleted file mode 100644 (file)

index 779794a..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php
+++ /dev/null
@@ -1,24 +0,0 @@
-<?php
-
-/**
- * Validates based on {ident} CSS grammar production
- */
-class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
-{
-
-    public function validate($string, $config, $context) {
-
-        $string = trim($string);
-
-        // early abort: '' and '0' (strings that convert to false) are invalid
-        if (!$string) return false;
-
-        $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
-        if (!preg_match($pattern, $string)) return false;
-        return $string;
-
-    }
-
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php

deleted file mode 100644 (file)

index ce68dbd..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php
+++ /dev/null
@@ -1,28 +0,0 @@
-<?php
-
-/**
- * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
- * with make.
- */
-class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
-{
-    /**
-     * What we're cloning
-     */
-    protected $clone;
-
-    public function __construct($clone) {
-        $this->clone = $clone;
-    }
-
-    public function validate($v, $config, $context) {
-        return $this->clone->validate($v, $config, $context);
-    }
-
-    public function make($string) {
-        return clone $this->clone;
-    }
-
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php

index 00d865723b39905ba41008a4d22a7c1ac0a31d0c..d01e20454ea9f8385160b3acf7c95c7e5ca2b267 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php
@@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
          $string = trim($string);
  
          if (empty($string)) return false;
-        if (isset($colors[strtolower($string)])) return $colors[$string];
+        if (isset($colors[$string])) return $colors[$string];
          if ($string[0] === '#') $hex = substr($string, 1);
          else $hex = $string;
  
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php

index 0015fa1ebb641ced02f23ac154cda994a5e5d557..81d03762dea58956773a19786ebf61b70b7b6b69 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php
@@ -12,22 +12,12 @@
  class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
  {
  
-    // selector is NOT a valid thing to use for IDREFs, because IDREFs
-    // *must* target IDs that exist, whereas selector #ids do not.
-
-    /**
-     * Determines whether or not we're validating an ID in a CSS
-     * selector context.
-     */
-    protected $selector;
-
-    public function __construct($selector = false) {
-        $this->selector = $selector;
-    }
+    // ref functionality disabled, since we also have to verify
+    // whether or not the ID it refers to exists
  
      public function validate($id, $config, $context) {
  
-        if (!$this->selector && !$config->get('Attr.EnableID')) return false;
+        if (!$config->get('Attr.EnableID')) return false;
  
          $id = trim($id); // trim it first
  
@@ -43,10 +33,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
                  '%Attr.IDPrefix is set', E_USER_WARNING);
          }
  
-        if (!$this->selector) {
+        //if (!$this->ref) {
              $id_accumulator =& $context->get('IDAccumulator');
              if (isset($id_accumulator->ids[$id])) return false;
-        }
+        //}
  
          // we purposely avoid using regex, hopefully this is faster
  
@@ -66,7 +56,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
              return false;
          }
  
-        if (!$this->selector && $result) $id_accumulator->add($id);
+        if (/*!$this->ref && */$result) $id_accumulator->add($id);
  
          // if no change was made to the ID, return the result
          // else, return the new id if stripping whitespace made it
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php

index c2b6846712ba38a9abe3efc2e3aae0ab852e5877..01a6d83e9526f81e3983863f04f8505d39ae2b36 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php
@@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
      }
  
      public function make($string) {
-        $embeds = ($string === 'embedded');
+        $embeds = (bool) $string;
          return new HTMLPurifier_AttrDef_URI($embeds);
      }
  
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php

index 125decb2df4b1a25b2b068339225e85a58618e7c..feca469d7030d16c55e97ed75d119c42b8bb33c8 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
@@ -44,8 +44,9 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
  
          // A regular domain name.
  
-        // This doesn't match I18N domain names, but we don't have proper IRI support,
-        // so force users to insert Punycode.
+        // This breaks I18N domain names, but we don't have proper IRI support,
+        // so force users to insert Punycode. If there's complaining we'll
+        // try to fix things into an international friendly form.
  
          // The productions describing this are:
          $a   = '[a-z]';     // alpha
@@ -56,44 +57,10 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
          // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
          $toplabel      = "$a($and*$an)?";
          // hostname    = *( domainlabel "." ) toplabel [ "." ]
-        if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
-            return $string;
-        }
-
-        // If we have Net_IDNA2 support, we can support IRIs by
-        // punycoding them. (This is the most portable thing to do,
-        // since otherwise we have to assume browsers support
-
-        if ($config->get('Core.EnableIDNA')) {
-            $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
-            // we need to encode each period separately
-            $parts = explode('.', $string);
-            try {
-                $new_parts = array();
-                foreach ($parts as $part) {
-                    $encodable = false;
-                    for ($i = 0, $c = strlen($part); $i < $c; $i++) {
-                        if (ord($part[$i]) > 0x7a) {
-                            $encodable = true;
-                            break;
-                        }
-                    }
-                    if (!$encodable) {
-                        $new_parts[] = $part;
-                    } else {
-                        $new_parts[] = $idna->encode($part);
-                    }
-                }
-                $string = implode('.', $new_parts);
-                if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
-                    return $string;
-                }
-            } catch (Exception $e) {
-                // XXX error reporting
-            }
-        }
+        $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
+        if (!$match) return false;
  
-        return false;
+        return $string;
      }
  
  }
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php

index f7fb1209b3be74ddbd3e03f7bcec85a35abe0646..573b42c9c5a1b73abc09caef1e0bbf79f7e1daa6 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php
@@ -24,13 +24,9 @@ class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
          $url = $this->parser->parse($attr['href']);
          $scheme = $url->getSchemeObj($config, $context);
  
-        if ($scheme->browsable && !$url->isLocal($config, $context)) {
+        if (!is_null($url->host) && $scheme !== false && $scheme->browsable) {
              if (isset($attr['rel'])) {
-                $rels = explode(' ', $attr);
-                if (!in_array('nofollow', $rels)) {
-                    $rels[] = 'nofollow';
-                }
-                $attr['rel'] = implode(' ', $rels);
+                $attr['rel'] .= ' nofollow';
              } else {
                  $attr['rel'] = 'nofollow';
              }
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php

deleted file mode 100644 (file)

index a6502c7..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php
+++ /dev/null
@@ -1,38 +0,0 @@
-<?php
-
-// must be called POST validation
-
-/**
- * Adds target="blank" to all outbound links.  This transform is
- * only attached if Attr.TargetBlank is TRUE.  This works regardless
- * of whether or not Attr.AllowedFrameTargets
- */
-class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
-{
-    private $parser;
-
-    public function __construct() {
-        $this->parser = new HTMLPurifier_URIParser();
-    }
-
-    public function transform($attr, $config, $context) {
-
-        if (!isset($attr['href'])) {
-            return $attr;
-        }
-
-        // XXX Kind of inefficient
-        $url = $this->parser->parse($attr['href']);
-        $scheme = $url->getSchemeObj($config, $context);
-
-        if ($scheme->browsable && !$url->isBenign($config, $context)) {
-            $attr['target'] = 'blank';
-        }
-
-        return $attr;
-
-    }
-
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php

index 6f985ff934750f0c5d068e991228748e8bf3ce93..fc2ea4e5881bfc301db5fc7fcd9cd1aae9082932 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php
@@ -15,13 +15,6 @@ class HTMLPurifier_AttrTypes
       * types.
       */
      public function __construct() {
-        // XXX This is kind of poor, since we don't actually /clone/
-        // instances; instead, we use the supplied make() attribute. So,
-        // the underlying class must know how to deal with arguments.
-        // With the old implementation of Enum, that ignored its
-        // arguments when handling a make dispatch, the IAlign
-        // definition wouldn't work.
-
          // pseudo-types, must be instantiated via shorthand
          $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
          $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
@@ -36,9 +29,6 @@ class HTMLPurifier_AttrTypes
          $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
          $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
          $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
-        $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
-        $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
-        $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
  
          // unimplemented aliases
          $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
@@ -54,10 +44,6 @@ class HTMLPurifier_AttrTypes
          $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
      }
  
-    private static function makeEnum($in) {
-        return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
-    }
-
      /**
       * Retrieves a type
       * @param $type String type name
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php

deleted file mode 100644 (file)

index cdaa289..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
+++ /dev/null
@@ -1,120 +0,0 @@
-<?php
-
-/**
- * Definition for list containers ul and ol.
- */
-class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
-{
-    public $type = 'list';
-    // lying a little bit, so that we can handle ul and ol ourselves
-    // XXX: This whole business with 'wrap' is all a bit unsatisfactory
-    public $elements = array('li' => true, 'ul' => true, 'ol' => true);
-    public function validateChildren($tokens_of_children, $config, $context) {
-        // Flag for subclasses
-        $this->whitespace = false;
-
-        // if there are no tokens, delete parent node
-        if (empty($tokens_of_children)) return false;
-
-        // the new set of children
-        $result = array();
-
-        // current depth into the nest
-        $nesting = 0;
-
-        // a little sanity check to make sure it's not ALL whitespace
-        $all_whitespace = true;
-
-        $seen_li = false;
-        $need_close_li = false;
-
-        foreach ($tokens_of_children as $token) {
-            if (!empty($token->is_whitespace)) {
-                $result[] = $token;
-                continue;
-            }
-            $all_whitespace = false; // phew, we're not talking about whitespace
-
-            if ($nesting == 1 && $need_close_li) {
-                $result[] = new HTMLPurifier_Token_End('li');
-                $nesting--;
-                $need_close_li = false;
-            }
-
-            $is_child = ($nesting == 0);
-
-            if ($token instanceof HTMLPurifier_Token_Start) {
-                $nesting++;
-            } elseif ($token instanceof HTMLPurifier_Token_End) {
-                $nesting--;
-            }
-
-            if ($is_child) {
-                if ($token->name === 'li') {
-                    // good
-                    $seen_li = true;
-                } elseif ($token->name === 'ul' || $token->name === 'ol') {
-                    // we want to tuck this into the previous li
-                    $need_close_li = true;
-                    $nesting++;
-                    if (!$seen_li) {
-                        // create a new li element
-                        $result[] = new HTMLPurifier_Token_Start('li');
-                    } else {
-                        // backtrack until </li> found
-                        while(true) {
-                            $t = array_pop($result);
-                            if ($t instanceof HTMLPurifier_Token_End) {
-                                // XXX actually, these invariants could very plausibly be violated
-                                // if we are doing silly things with modifying the set of allowed elements.
-                                // FORTUNATELY, it doesn't make a difference, since the allowed
-                                // elements are hard-coded here!
-                                if ($t->name !== 'li') {
-                                    trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
-                                    return false;
-                                }
-                                break;
-                            } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
-                                if ($t->name !== 'li') {
-                                    trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
-                                    return false;
-                                }
-                                // XXX this should have a helper for it...
-                                $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
-                                break;
-                            } else {
-                                if (!$t->is_whitespace) {
-                                    trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
-                                    return false;
-                                }
-                            }
-                        }
-                    }
-                } else {
-                    // start wrapping (this doesn't precisely mimic
-                    // browser behavior, but what browsers do is kind of
-                    // hard to mimic in a standards compliant way
-                    // XXX Actually, this has no impact in practice,
-                    // because this gets handled earlier. Arguably,
-                    // we should rip out all of that processing
-                    $result[] = new HTMLPurifier_Token_Start('li');
-                    $nesting++;
-                    $seen_li = true;
-                    $need_close_li = true;
-                }
-            }
-            $result[] = $token;
-        }
-        if ($need_close_li) {
-            $result[] = new HTMLPurifier_Token_End('li');
-        }
-        if (empty($result)) return false;
-        if ($all_whitespace) {
-            return false;
-        }
-        if ($tokens_of_children == $result) return true;
-        return $result;
-    }
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php

index 9a93421a1a3e2dbdcb4720ecc5a7482e5032205c..34f0227dd2cc549f4dacef4efd9d6ae8b43680d9 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
@@ -1,33 +1,7 @@
  <?php
  
  /**
- * Definition for tables.  The general idea is to extract out all of the
- * essential bits, and then reconstruct it later.
- *
- * This is a bit confusing, because the DTDs and the W3C
- * validators seem to disagree on the appropriate definition. The
- * DTD claims:
- *
- *      (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
- *
- * But actually, the HTML4 spec then has this to say:
- *
- *      The TBODY start tag is always required except when the table
- *      contains only one table body and no table head or foot sections.
- *      The TBODY end tag may always be safely omitted.
- *
- * So the DTD is kind of wrong.  The validator is, unfortunately, kind
- * of on crack.
- *
- * The definition changed again in XHTML1.1; and in my opinion, this
- * formulation makes the most sense.
- *
- *      caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
- *
- * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
- * If we encounter a thead, tfoot or tbody, we are placed in the former
- * mode, and we *must* wrap any stray tr segments with a tbody. But if
- * we don't run into any of them, just have tr tags is OK.
+ * Definition for tables
   */
  class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
  {
@@ -59,8 +33,6 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
          $collection = array(); // collected nodes
          $tag_index = 0; // the first node might be whitespace,
                              // so this tells us where the start tag is
-        $tbody_mode = false; // if true, then we need to wrap any stray
-                             // <tr>s with a <tbody>.
  
          foreach ($tokens_of_children as $token) {
              $is_child = ($nesting == 0);
@@ -79,9 +51,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
                      // okay, let's stash the tokens away
                      // first token tells us the type of the collection
                      switch ($collection[$tag_index]->name) {
-                        case 'tbody':
-                            $tbody_mode = true;
                          case 'tr':
+                        case 'tbody':
                              $content[] = $collection;
                              break;
                          case 'caption':
@@ -90,28 +61,13 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
                              break;
                          case 'thead':
                          case 'tfoot':
-                            $tbody_mode = true;
-                            // XXX This breaks rendering properties with
-                            // Firefox, which never floats a <thead> to
-                            // the top. Ever. (Our scheme will float the
-                            // first <thead> to the top.)  So maybe
-                            // <thead>s that are not first should be
-                            // turned into <tbody>? Very tricky, indeed.
-
                              // access the appropriate variable, $thead or $tfoot
                              $var = $collection[$tag_index]->name;
                              if ($$var === false) {
                                  $$var = $collection;
                              } else {
-                                // Oops, there's a second one! What
-                                // should we do?  Current behavior is to
-                                // transmutate the first and last entries into
-                                // tbody tags, and then put into content.
-                                // Maybe a better idea is to *attach
-                                // it* to the existing thead or tfoot?
-                                // We don't do this, because Firefox
-                                // doesn't float an extra tfoot to the
-                                // bottom like it does for the first one.
+                                // transmutate the first and less entries into
+                                // tbody tags, and then put into content
                                  $collection[$tag_index]->name = 'tbody';
                                  $collection[count($collection)-1]->name = 'tbody';
                                  $content[] = $collection;
@@ -170,48 +126,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
          if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
          if ($thead !== false)   $ret = array_merge($ret, $thead);
          if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
-
-        if ($tbody_mode) {
-            // a little tricky, since the start of the collection may be
-            // whitespace
-            $inside_tbody = false;
-            foreach ($content as $token_array) {
-                // find the starting token
-                foreach ($token_array as $t) {
-                    if ($t->name === 'tr' || $t->name === 'tbody') {
-                        break;
-                    }
-                } // iterator variable carries over
-                if ($t->name === 'tr') {
-                    if ($inside_tbody) {
-                        $ret = array_merge($ret, $token_array);
-                    } else {
-                        $ret[] = new HTMLPurifier_Token_Start('tbody');
-                        $ret = array_merge($ret, $token_array);
-                        $inside_tbody = true;
-                    }
-                } elseif ($t->name === 'tbody') {
-                    if ($inside_tbody) {
-                        $ret[] = new HTMLPurifier_Token_End('tbody');
-                        $inside_tbody = false;
-                        $ret = array_merge($ret, $token_array);
-                    } else {
-                        $ret = array_merge($ret, $token_array);
-                    }
-                } else {
-                    trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
-                }
-            }
-            if ($inside_tbody) {
-                $ret[] = new HTMLPurifier_Token_End('tbody');
-            }
-        } else {
-            foreach ($content as $token_array) {
-                // invariant: everything in here is <tr>s
-                $ret = array_merge($ret, $token_array);
-            }
-        }
-
+        foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
          if (!empty($collection) && $is_collecting == false){
              // grab the trailing space
              $ret = array_merge($ret, $collection);
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php

index 554980f2225a0384e632853bf33f15590340a74f..b6551398f8440f7b399f3409c0f889b87087e543 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/Config.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php
@@ -20,7 +20,7 @@ class HTMLPurifier_Config
      /**
       * HTML Purifier's version
       */
-    public $version = '4.4.0';
+    public $version = '4.3.0';
  
      /**
       * Bool indicator whether or not to automatically finalize
@@ -44,7 +44,7 @@ class HTMLPurifier_Config
      /**
       * Parser for variables
       */
-    protected $parser = null;
+    protected $parser;
  
      /**
       * Reference HTMLPurifier_ConfigSchema for value checking
@@ -668,7 +668,7 @@ class HTMLPurifier_Config
       */
      public function finalize() {
          $this->finalized = true;
-        $this->parser = null;
+        unset($this->parser);
      }
  
      /**
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php

new file mode 100755 (executable)

index 0000000..3ffac0a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Interchange component class describing namespaces.
+ */
+class HTMLPurifier_ConfigSchema_Interchange_Namespace
+{
+
+    /**
+     * Name of namespace defined.
+     */
+    public $namespace;
+
+    /**
+     * HTML description.
+     */
+    public $description;
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser

index b106bcf798a893df8b98c41068b5fc014d4af770..245ba5d2d09c71529c7e1357160a3393ff280a4d 100644 (file)

Binary files a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser and b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt

new file mode 100755 (executable)

index 0000000..2d72049
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt
@@ -0,0 +1,3 @@
+Attr
+DESCRIPTION: Features regarding attribute validation.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.txt

new file mode 100755 (executable)

index 0000000..161a52e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.txt
@@ -0,0 +1,3 @@
+AutoFormat
+DESCRIPTION: Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt

new file mode 100755 (executable)

index 0000000..3e8309e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt
@@ -0,0 +1,12 @@
+AutoFormatParam.PurifierLinkifyDocURL
+TYPE: string
+VERSION: 2.0.1
+DEFAULT: '#%s'
+--DESCRIPTION--
+
+<p>
+  Location of configuration documentation to link to, let %s substitute
+  into the configuration's namespace and directive names sans the percent
+  sign.
+</p>
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt

new file mode 100755 (executable)

index 0000000..6097a55
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt
@@ -0,0 +1,3 @@
+AutoFormatParam
+DESCRIPTION: Configuration for customizing auto-formatting functionality
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt

new file mode 100755 (executable)

index 0000000..d14b490
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt
@@ -0,0 +1,3 @@
+CSS
+DESCRIPTION: Configuration regarding allowed CSS.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt

new file mode 100755 (executable)

index 0000000..57f3023
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt
@@ -0,0 +1,3 @@
+Cache
+DESCRIPTION: Configuration for DefinitionCache and related subclasses.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt

index c572c14ec177952da596776b05042ed6391f72fb..08b381d34c1aeb49f55293decbe18588826d95e9 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt
@@ -24,6 +24,5 @@ array (
  --DESCRIPTION--
  
  Lookup array of color names to six digit hexadecimal number corresponding
-to color, with preceding hash mark. Used when parsing colors.  The lookup
-is done in a case-insensitive manner.
+to color, with preceding hash mark. Used when parsing colors.
  --# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt

deleted file mode 100644 (file)

index ce243c3..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Core.EnableIDNA
-TYPE: bool
-DEFAULT: false
-VERSION: 4.4.0
---DESCRIPTION--
-Allows international domain names in URLs.  This configuration option
-requires the PEAR Net_IDNA2 module to be installed.  It operates by
-punycoding any internationalized host names for maximum portability.
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt

new file mode 100755 (executable)

index 0000000..5edfe07
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt
@@ -0,0 +1,3 @@
+Core
+DESCRIPTION: Core features that are always available.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt

new file mode 100755 (executable)

index 0000000..f2d25a1
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt
@@ -0,0 +1,3 @@
+Filter
+DESCRIPTION: Directives for turning filters on and off, or specifying custom filters.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt

new file mode 100755 (executable)

index 0000000..d436ed0
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt
@@ -0,0 +1,14 @@
+FilterParam.ExtractStyleBlocksEscaping
+TYPE: bool
+VERSION: 3.0.0
+DEFAULT: true
+ALIASES: Filter.ExtractStyleBlocksEscaping
+--DESCRIPTION--
+
+<p>
+  Whether or not to escape the dangerous characters &lt;, &gt; and &amp;
+  as \3C, \3E and \26, respectively. This is can be safely set to false
+  if the contents of StyleBlocks will be placed in an external stylesheet,
+  where there is no risk of it being interpreted as HTML.
+</p>
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt

new file mode 100755 (executable)

index 0000000..3943529
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt
@@ -0,0 +1,29 @@
+FilterParam.ExtractStyleBlocksScope
+TYPE: string/null
+VERSION: 3.0.0
+DEFAULT: NULL
+ALIASES: Filter.ExtractStyleBlocksScope
+--DESCRIPTION--
+
+<p>
+  If you would like users to be able to define external stylesheets, but
+  only allow them to specify CSS declarations for a specific node and
+  prevent them from fiddling with other elements, use this directive.
+  It accepts any valid CSS selector, and will prepend this to any
+  CSS declaration extracted from the document. For example, if this
+  directive is set to <code>#user-content</code> and a user uses the
+  selector <code>a:hover</code>, the final selector will be
+  <code>#user-content a:hover</code>.
+</p>
+<p>
+  The comma shorthand may be used; consider the above example, with
+  <code>#user-content, #user-content2</code>, the final selector will
+  be <code>#user-content a:hover, #user-content2 a:hover</code>.
+</p>
+<p>
+  <strong>Warning:</strong> It is possible for users to bypass this measure
+  using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
+  Purifier, and I am working to get it fixed. Until then, HTML Purifier
+  performs a basic check to prevent this.
+</p>
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt

new file mode 100755 (executable)

index 0000000..cafccf8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt
@@ -0,0 +1,15 @@
+FilterParam.ExtractStyleBlocksTidyImpl
+TYPE: mixed/null
+VERSION: 3.1.0
+DEFAULT: NULL
+--DESCRIPTION--
+<p>
+  If left NULL, HTML Purifier will attempt to instantiate a <code>csstidy</code>
+  class to use for internal cleaning. This will usually be good enough.
+</p>
+<p>
+  However, for trusted user input, you can set this to <code>false</code> to
+  disable cleaning. In addition, you can supply your own concrete implementation
+  of Tidy's interface to use, although I don't know why you'd want to do that.
+</p>
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt

new file mode 100755 (executable)

index 0000000..dff9784
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt
@@ -0,0 +1,3 @@
+FilterParam
+DESCRIPTION: Configuration for filters.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt

deleted file mode 100644 (file)

index 140e214..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-HTML.AllowedComments
-TYPE: lookup
-VERSION: 4.4.0
-DEFAULT: array()
---DESCRIPTION--
-A whitelist which indicates what explicit comment bodies should be
-allowed, modulo leading and trailing whitespace.  See also %HTML.AllowedCommentsRegexp
-(these directives are union'ed together, so a comment is considered
-valid if any directive deems it valid.)
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt

deleted file mode 100644 (file)

index f22e977..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-HTML.AllowedCommentsRegexp
-TYPE: string/null
-VERSION: 4.4.0
-DEFAULT: NULL
---DESCRIPTION--
-A regexp, which if it matches the body of a comment, indicates that
-it should be allowed. Trailing and leading spaces are removed prior
-to running this regular expression.
-<strong>Warning:</strong> Make sure you specify
-correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
-comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
-is probably not sufficiently strict, since it also allows <code>foobar</code>.
-See also %HTML.AllowedComments (these directives are union'ed together,
-so a comment is considered valid if any directive deems it valid.)
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt

deleted file mode 100644 (file)

index 5eb6ec2..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-HTML.SafeIframe
-TYPE: bool
-VERSION: 4.4.0
-DEFAULT: false
---DESCRIPTION--
-<p>
-    Whether or not to permit iframe tags in untrusted documents.  This
-    directive must be accompanied by a whitelist of permitted iframes,
-    such as %URI.SafeIframeRegexp, otherwise it will fatally error.
-    This directive has no effect on strict doctypes, as iframes are not
-    valid.
-</p>
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt

deleted file mode 100644 (file)

index 587a167..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-HTML.TargetBlank
-TYPE: bool
-VERSION: 4.4.0
-DEFAULT: FALSE
---DESCRIPTION--
-If enabled, <code>target=blank</code> attributes are added to all outgoing links.
-(This includes links from an HTTPS version of a page to an HTTP version.)
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt

new file mode 100755 (executable)

index 0000000..f32ceb5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt
@@ -0,0 +1,3 @@
+HTML
+DESCRIPTION: Configuration regarding allowed HTML.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt

new file mode 100755 (executable)

index 0000000..7849d60
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt
@@ -0,0 +1,3 @@
+Output
+DESCRIPTION: Configuration relating to the generation of (X)HTML.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt

new file mode 100755 (executable)

index 0000000..5025f9d
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt
@@ -0,0 +1,3 @@
+Test
+DESCRIPTION: Developer testing configuration for our unit tests.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt

deleted file mode 100644 (file)

index 7908483..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-URI.SafeIframeRegexp
-TYPE: string/null
-VERSION: 4.4.0
-DEFAULT: NULL
---DESCRIPTION--
-<p>
-    A PCRE regular expression that will be matched against an iframe URI.  This is
-    a relatively inflexible scheme, but works well enough for the most common
-    use-case of iframes: embedded video.  This directive only has an effect if
-    %HTML.SafeIframe is enabled.  Here are some example values:
-</p>
-<ul>
-    <li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
-    <li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
-    <li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
-</ul>
-<p>
-    Note that this directive does not give you enough granularity to, say, disable
-    all <code>autoplay</code> videos.  Pipe up on the HTML Purifier forums if this
-    is a capability you want.
-</p>
---# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt

new file mode 100755 (executable)

index 0000000..a13060f
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt
@@ -0,0 +1,3 @@
+URI
+DESCRIPTION: Features regarding Uniform Resource Identifiers.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php

index 9fa76bd18c84bd133c5799556360efb5e8900cf5..2b3140caaf575a15c22550b864839c50c0d6b439 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php
@@ -19,68 +19,6 @@ class HTMLPurifier_Encoder
       */
      public static function muteErrorHandler() {}
  
-    /**
-     * iconv wrapper which mutes errors, but doesn't work around bugs.
-     */
-    public static function unsafeIconv($in, $out, $text) {
-        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
-        $r = iconv($in, $out, $text);
-        restore_error_handler();
-        return $r;
-    }
-
-    /**
-     * iconv wrapper which mutes errors and works around bugs.
-     */
-    public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
-        $code = self::testIconvTruncateBug();
-        if ($code == self::ICONV_OK) {
-            return self::unsafeIconv($in, $out, $text);
-        } elseif ($code == self::ICONV_TRUNCATES) {
-            // we can only work around this if the input character set
-            // is utf-8
-            if ($in == 'utf-8') {
-                if ($max_chunk_size < 4) {
-                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
-                    return false;
-                }
-                // split into 8000 byte chunks, but be careful to handle
-                // multibyte boundaries properly
-                if (($c = strlen($text)) <= $max_chunk_size) {
-                    return self::unsafeIconv($in, $out, $text);
-                }
-                $r = '';
-                $i = 0;
-                while (true) {
-                    if ($i + $max_chunk_size >= $c) {
-                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
-                        break;
-                    }
-                    // wibble the boundary
-                    if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
-                        $chunk_size = $max_chunk_size;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
-                        $chunk_size = $max_chunk_size - 1;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
-                        $chunk_size = $max_chunk_size - 2;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
-                        $chunk_size = $max_chunk_size - 3;
-                    } else {
-                        return false; // rather confusing UTF-8...
-                    }
-                    $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
-                    $r .= self::unsafeIconv($in, $out, $chunk);
-                    $i += $chunk_size;
-                }
-                return $r;
-            } else {
-                return false;
-            }
-        } else {
-            return false;
-        }
-    }
-
      /**
       * Cleans a UTF-8 string for well-formedness and SGML validity
       *
@@ -322,14 +260,6 @@ class HTMLPurifier_Encoder
          return $ret;
      }
  
-    public static function iconvAvailable() {
-        static $iconv = null;
-        if ($iconv === null) {
-            $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
-        }
-        return $iconv;
-    }
-
      /**
       * Converts a string to UTF-8 based on configuration.
       */
@@ -337,22 +267,25 @@ class HTMLPurifier_Encoder
          $encoding = $config->get('Core.Encoding');
          if ($encoding === 'utf-8') return $str;
          static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
+        if ($iconv === null) $iconv = function_exists('iconv');
+        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
          if ($iconv && !$config->get('Test.ForceNoIconv')) {
-            // unaffected by bugs, since UTF-8 support all characters
-            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
+            $str = iconv($encoding, 'utf-8//IGNORE', $str);
              if ($str === false) {
                  // $encoding is not a valid encoding
+                restore_error_handler();
                  trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
                  return '';
              }
              // If the string is bjorked by Shift_JIS or a similar encoding
              // that doesn't support all of ASCII, convert the naughty
              // characters to their true byte-wise ASCII/UTF-8 equivalents.
-            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
+            $str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
+            restore_error_handler();
              return $str;
          } elseif ($encoding === 'iso-8859-1') {
              $str = utf8_encode($str);
+            restore_error_handler();
              return $str;
          }
          trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
@@ -365,15 +298,16 @@ class HTMLPurifier_Encoder
       */
      public static function convertFromUTF8($str, $config, $context) {
          $encoding = $config->get('Core.Encoding');
-        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
-            $str = self::convertToASCIIDumbLossless($str);
-        }
          if ($encoding === 'utf-8') return $str;
          static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
+        if ($iconv === null) $iconv = function_exists('iconv');
+        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
+            $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
+        }
+        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
          if ($iconv && !$config->get('Test.ForceNoIconv')) {
              // Undo our previous fix in convertToUTF8, otherwise iconv will barf
-            $ascii_fix = self::testEncodingSupportsASCII($encoding);
+            $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
              if (!$escape && !empty($ascii_fix)) {
                  $clear_fix = array();
                  foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
@@ -381,17 +315,15 @@ class HTMLPurifier_Encoder
              }
              $str = strtr($str, array_flip($ascii_fix));
              // Normal stuff
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
+            $str = iconv('utf-8', $encoding . '//IGNORE', $str);
+            restore_error_handler();
              return $str;
          } elseif ($encoding === 'iso-8859-1') {
              $str = utf8_decode($str);
+            restore_error_handler();
              return $str;
          }
          trigger_error('Encoding not supported', E_USER_ERROR);
-        // You might be tempted to assume that the ASCII representation
-        // might be OK, however, this is *not* universally true over all
-        // encodings.  So we take the conservative route here, rather
-        // than forcibly turn on %Core.EscapeNonASCIICharacters
      }
  
      /**
@@ -441,49 +373,6 @@ class HTMLPurifier_Encoder
          return $result;
      }
  
-    /** No bugs detected in iconv. */
-    const ICONV_OK = 0;
-
-    /** Iconv truncates output if converting from UTF-8 to another
-     *  character set with //IGNORE, and a non-encodable character is found */
-    const ICONV_TRUNCATES = 1;
-
-    /** Iconv does not support //IGNORE, making it unusable for
-     *  transcoding purposes */
-    const ICONV_UNUSABLE = 2;
-
-    /**
-     * glibc iconv has a known bug where it doesn't handle the magic
-     * //IGNORE stanza correctly.  In particular, rather than ignore
-     * characters, it will return an EILSEQ after consuming some number
-     * of characters, and expect you to restart iconv as if it were
-     * an E2BIG.  Old versions of PHP did not respect the errno, and
-     * returned the fragment, so as a result you would see iconv
-     * mysteriously truncating output. We can work around this by
-     * manually chopping our input into segments of about 8000
-     * characters, as long as PHP ignores the error code.  If PHP starts
-     * paying attention to the error code, iconv becomes unusable.
-     *
-     * @returns Error code indicating severity of bug.
-     */
-    public static function testIconvTruncateBug() {
-        static $code = null;
-        if ($code === null) {
-            // better not use iconv, otherwise infinite loop!
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
-            if ($r === false) {
-                $code = self::ICONV_UNUSABLE;
-            } elseif (($c = strlen($r)) < 9000) {
-                $code = self::ICONV_TRUNCATES;
-            } elseif ($c > 9000) {
-                trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
-            } else {
-                $code = self::ICONV_OK;
-            }
-        }
-        return $code;
-    }
-
      /**
       * This expensive function tests whether or not a given character
       * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
@@ -496,11 +385,6 @@ class HTMLPurifier_Encoder
       *      which can be used to "undo" any overzealous iconv action.
       */
      public static function testEncodingSupportsASCII($encoding, $bypass = false) {
-        // All calls to iconv here are unsafe, proof by case analysis:
-        // If ICONV_OK, no difference.
-        // If ICONV_TRUNCATE, all calls involve one character inputs,
-        // so bug is not triggered.
-        // If ICONV_UNUSABLE, this call is irrelevant
          static $encodings = array();
          if (!$bypass) {
              if (isset($encodings[$encoding])) return $encodings[$encoding];
@@ -514,22 +398,24 @@ class HTMLPurifier_Encoder
              if (strpos($lenc, 'iso-8859-') === 0) return array();
          }
          $ret = array();
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
+        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+        if (iconv('UTF-8', $encoding, 'a') === false) return false;
          for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
              $c = chr($i); // UTF-8 char
-            $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
+            $r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
              if (
                  $r === '' ||
                  // This line is needed for iconv implementations that do not
                  // omit characters that do not exist in the target character set
-                ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
+                ($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
              ) {
                  // Reverse engineer: what's the UTF-8 equiv of this byte
                  // sequence? This assumes that there's no variable width
                  // encoding that doesn't support ASCII.
-                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
+                $ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
              }
          }
+        restore_error_handler();
          $encodings[$encoding] = $ret;
          return $ret;
      }
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php

index 320aa4f16e0afb845687b33303d0af9d650e4ac0..bbf78a6630a6b02c8495b1433695336b5e0c9853 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -1,11 +1,5 @@
  <?php
  
-// why is this a top level function? Because PHP 5.2.0 doesn't seem to
-// understand how to interpret this filter if it's a static method.
-// It's all really silly, but if we go this route it might be reasonable
-// to coalesce all of these methods into one.
-function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
-
  /**
   * This filter extracts <style> blocks from input HTML, cleans them up
   * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
@@ -27,15 +21,8 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
      private $_styleMatches = array();
      private $_tidy;
  
-    private $_id_attrdef;
-    private $_class_attrdef;
-    private $_enum_attrdef;
-
      public function __construct() {
          $this->_tidy = new csstidy();
-        $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
-        $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
-        $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
      }
  
      /**
@@ -90,166 +77,27 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
              $css = substr($css, 0, -3);
          }
          $css = trim($css);
-        set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
          $this->_tidy->parse($css);
-        restore_error_handler();
          $css_definition = $config->getDefinition('CSS');
-        $html_definition = $config->getDefinition('HTML');
-        $new_css = array();
          foreach ($this->_tidy->css as $k => $decls) {
              // $decls are all CSS declarations inside an @ selector
              $new_decls = array();
              foreach ($decls as $selector => $style) {
                  $selector = trim($selector);
                  if ($selector === '') continue; // should not happen
-                // Parse the selector
-                // Here is the relevant part of the CSS grammar:
-                //
-                // ruleset
-                //   : selector [ ',' S* selector ]* '{' ...
-                // selector
-                //   : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
-                // combinator
-                //   : '+' S*
-                //   : '>' S*
-                // simple_selector
-                //   : element_name [ HASH | class | attrib | pseudo ]*
-                //   | [ HASH | class | attrib | pseudo ]+
-                // element_name
-                //   : IDENT | '*'
-                //   ;
-                // class
-                //   : '.' IDENT
-                //   ;
-                // attrib
-                //   : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
-                //     [ IDENT | STRING ] S* ]? ']'
-                //   ;
-                // pseudo
-                //   : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
-                //   ;
-                //
-                // For reference, here are the relevant tokens:
-                //
-                // HASH         #{name}
-                // IDENT        {ident}
-                // INCLUDES     ==
-                // DASHMATCH    |=
-                // STRING       {string}
-                // FUNCTION     {ident}\(
-                //
-                // And the lexical scanner tokens
-                //
-                // name         {nmchar}+
-                // nmchar       [_a-z0-9-]|{nonascii}|{escape}
-                // nonascii     [\240-\377]
-                // escape       {unicode}|\\[^\r\n\f0-9a-f]
-                // unicode      \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
-                // ident        -?{nmstart}{nmchar*}
-                // nmstart      [_a-z]|{nonascii}|{escape}
-                // string       {string1}|{string2}
-                // string1      \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
-                // string2      \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
-                //
-                // We'll implement a subset (in order to reduce attack
-                // surface); in particular:
-                //
-                //      - No Unicode support
-                //      - No escapes support
-                //      - No string support (by proxy no attrib support)
-                //      - element_name is matched against allowed
-                //        elements (some people might find this
-                //        annoying...)
-                //      - Pseudo-elements one of :first-child, :link,
-                //        :visited, :active, :hover, :focus
-
-                // handle ruleset
-                $selectors = array_map('trim', explode(',', $selector));
-                $new_selectors = array();
-                foreach ($selectors as $sel) {
-                    // split on +, > and spaces
-                    $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
-                    // even indices are chunks, odd indices are
-                    // delimiters
-                    $nsel = null;
-                    $delim = null; // guaranteed to be non-null after
-                                   // two loop iterations
-                    for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
-                        $x = $basic_selectors[$i];
-                        if ($i % 2) {
-                            // delimiter
-                            if ($x === ' ') {
-                                $delim = ' ';
-                            } else {
-                                $delim = ' ' . $x . ' ';
-                            }
-                        } else {
-                            // simple selector
-                            $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
-                            $sdelim = null;
-                            $nx = null;
-                            for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
-                                $y = $components[$j];
-                                if ($j === 0) {
-                                    if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
-                                        $nx = $y;
-                                    } else {
-                                        // $nx stays null; this matters
-                                        // if we don't manage to find
-                                        // any valid selector content,
-                                        // in which case we ignore the
-                                        // outer $delim
-                                    }
-                                } elseif ($j % 2) {
-                                    // set delimiter
-                                    $sdelim = $y;
-                                } else {
-                                    $attrdef = null;
-                                    if ($sdelim === '#') {
-                                        $attrdef = $this->_id_attrdef;
-                                    } elseif ($sdelim === '.') {
-                                        $attrdef = $this->_class_attrdef;
-                                    } elseif ($sdelim === ':') {
-                                        $attrdef = $this->_enum_attrdef;
-                                    } else {
-                                        throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
-                                    }
-                                    $r = $attrdef->validate($y, $config, $context);
-                                    if ($r !== false) {
-                                        if ($r !== true) {
-                                            $y = $r;
-                                        }
-                                        if ($nx === null) {
-                                            $nx = '';
-                                        }
-                                        $nx .= $sdelim . $y;
-                                    }
-                                }
-                            }
-                            if ($nx !== null) {
-                                if ($nsel === null) {
-                                    $nsel = $nx;
-                                } else {
-                                    $nsel .= $delim . $nx;
-                                }
-                            } else {
-                                // delimiters to the left of invalid
-                                // basic selector ignored
-                            }
-                        }
-                    }
-                    if ($nsel !== null) {
-                        if (!empty($scopes)) {
-                            foreach ($scopes as $s) {
-                                $new_selectors[] = "$s $nsel";
-                            }
-                        } else {
-                            $new_selectors[] = $nsel;
+                if ($selector[0] === '+') {
+                    if ($selector !== '' && $selector[0] === '+') continue;
+                }
+                if (!empty($scopes)) {
+                    $new_selector = array(); // because multiple ones are possible
+                    $selectors = array_map('trim', explode(',', $selector));
+                    foreach ($scopes as $s1) {
+                        foreach ($selectors as $s2) {
+                            $new_selector[] = "$s1 $s2";
                          }
                      }
+                    $selector = implode(', ', $new_selector); // now it's a string
                  }
-                if (empty($new_selectors)) continue;
-                $selector = implode(', ', $new_selectors);
                  foreach ($style as $name => $value) {
                      if (!isset($css_definition->info[$name])) {
                          unset($style[$name]);
@@ -262,11 +110,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
                  }
                  $new_decls[$selector] = $style;
              }
-            $new_css[$k] = $new_decls;
+            $this->_tidy->css[$k] = $new_decls;
          }
          // remove stuff that shouldn't be used, could be reenabled
          // after security risks are analyzed
-        $this->_tidy->css = $new_css;
          $this->_tidy->import = array();
          $this->_tidy->charset = null;
          $this->_tidy->namespace = null;
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php

index b079d44c1349be7184d51a92e7195646c47ef89f..33bb38ac5f43cad1fb4c03408ed56e207f66691c 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php
@@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
          return $this->_anonModule;
      }
  
-    private $_anonModule = null;
+    private $_anonModule;
  
  
      // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php

index b963529a77a423797397f1581aec5f79a077104e..44c22f6f8b53283d5bc2a28608d5b997d9cb2fb1 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php
@@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
              'name' => 'CDATA',
              'readonly' => 'Bool#readonly',
              'size' => 'Number',
-            'src' => 'URI#embedded',
+            'src' => 'URI#embeds',
              'tabindex' => 'Number',
              'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
              'value' => 'CDATA',
@@ -84,8 +84,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
          $button->excludes = $this->makeLookup(
              'form', 'fieldset', // Form
              'input', 'select', 'textarea', 'label', 'button', // Formctrl
-            'a', // as per HTML 4.01 spec, this is omitted by modularization
-            'isindex', 'iframe' // legacy items
+            'a' // as per HTML 4.01 spec, this is omitted by modularization
          );
  
          // Extra exclusion: img usemap="" is not permitted within this element.
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php

deleted file mode 100644 (file)

index 287071e..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php
+++ /dev/null
@@ -1,38 +0,0 @@
-<?php
-
-/**
- * XHTML 1.1 Iframe Module provides inline frames.
- *
- * @note This module is not considered safe unless an Iframe
- * whitelisting mechanism is specified.  Currently, the only
- * such mechanism is %URL.SafeIframeRegexp
- */
-class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
-{
-
-    public $name = 'Iframe';
-    public $safe = false;
-
-    public function setup($config) {
-        if ($config->get('HTML.SafeIframe')) {
-            $this->safe = true;
-        }
-        $this->addElement(
-            'iframe', 'Inline', 'Flow', 'Common',
-            array(
-                'src' => 'URI#embedded',
-                'width' => 'Length',
-                'height' => 'Length',
-                'name' => 'ID',
-                'scrolling' => 'Enum#yes,no,auto',
-                'frameborder' => 'Enum#0,1',
-                'longdesc' => 'URI',
-                'marginheight' => 'Pixels',
-                'marginwidth' => 'Pixels',
-            )
-        );
-    }
-
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php

index f278eeced29adaf34600e084ced8568df8db18f7..df33927ba6b27d6e7f0151545bd3a9cda929eb60 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php
@@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
          $hr->attr['width'] = 'Length';
  
          $img = $this->addBlankElement('img');
-        $img->attr['align'] = 'IAlign';
+        $img->attr['align'] = 'Enum#top,middle,bottom,left,right';
          $img->attr['border'] = 'Pixels';
          $img->attr['hspace'] = 'Pixels';
          $img->attr['vspace'] = 'Pixels';
@@ -136,22 +136,6 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
          $ul->attr['compact'] = 'Bool#compact';
          $ul->attr['type'] = 'Enum#square,disc,circle';
  
-        // "safe" modifications to "unsafe" elements
-        // WARNING: If you want to add support for an unsafe, legacy
-        // attribute, make a new TrustedLegacy module with the trusted
-        // bit set appropriately
-
-        $form = $this->addBlankElement('form');
-        $form->content_model = 'Flow | #PCDATA';
-        $form->content_model_type = 'optional';
-        $form->attr['target'] = 'FrameTarget';
-
-        $input = $this->addBlankElement('input');
-        $input->attr['align'] = 'IAlign';
-
-        $legend = $this->addBlankElement('legend');
-        $legend->attr['align'] = 'LAlign';
-
      }
  
  }
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php

index 79ccefafd963d474bfd796f8faac755e8a8a74eb..74d4522f4e24ffdd9bc62926f7d0474117af0b62 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php
@@ -20,16 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
      public $content_sets = array('Flow' => 'List');
  
      public function setup($config) {
-        $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
-        $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
-        // XXX The wrap attribute is handled by MakeWellFormed.  This is all
-        // quite unsatisfactory, because we generated this
-        // *specifically* for lists, and now a big chunk of the handling
-        // is done properly by the List ChildDef.  So actually, we just
-        // want enough information to make autoclosing work properly,
-        // and then hand off the tricky stuff to the ChildDef.
-        $ol->wrap = 'li';
-        $ul->wrap = 'li';
+        $ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
+        $ol->wrap = "li";
+        $ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
+        $ul->wrap = "li";
          $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
  
          $this->addElement('li', false, 'Flow', 'Common');
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php

index 45c42bb3e44b9bdc825e1dbe3f445aaf306f1879..f314ced3f82ea6e36346d057f6404252b78c74f1 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php
@@ -37,9 +37,6 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
                  'abbr'    => 'Text',
                  'colspan' => 'Number',
                  'rowspan' => 'Number',
-                // Apparently, as of HTML5 this attribute only applies
-                // to 'th' elements.
-                'scope'   => 'Enum#row,col,rowgroup,colgroup',
              ),
              $cell_align
          );
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php

deleted file mode 100644 (file)

index e1305ec..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php
+++ /dev/null
@@ -1,19 +0,0 @@
-<?php
-
-/**
- * Module adds the target=blank attribute transformation to a tags.  It
- * is enabled by HTML.TargetBlank
- */
-class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
-{
-
-    public $name = 'TargetBlank';
-
-    public function setup($config) {
-        $a = $this->addBlankElement('a');
-        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
-    }
-
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php

index 7a06fc02290afe81c0376665c458ee84729299ec..362e3b78db595615af3da22eb641f0b915c14ca2 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
@@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager
              'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
              'StyleAttribute',
              // Unsafe:
-            'Scripting', 'Object', 'Forms',
+            'Scripting', 'Object',  'Forms',
              // Sorta legacy, but present in strict:
              'Name',
          );
-        $transitional = array('Legacy', 'Target', 'Iframe');
+        $transitional = array('Legacy', 'Target');
          $xml = array('XMLCommonAttributes');
          $non_xml = array('NonXMLCommonAttributes');
  
@@ -112,9 +112,7 @@ class HTMLPurifier_HTMLModuleManager
  
          $this->doctypes->register(
              'XHTML 1.1', true,
-            // Iframe is a real XHTML 1.1 module, despite being
-            // "transitional"!
-            array_merge($common, $xml, array('Ruby', 'Iframe')),
+            array_merge($common, $xml, array('Ruby')),
              array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
              array(),
              '-//W3C//DTD XHTML 1.1//EN',
@@ -231,9 +229,6 @@ class HTMLPurifier_HTMLModuleManager
          if ($config->get('HTML.Nofollow')) {
              $modules[] = 'Nofollow';
          }
-        if ($config->get('HTML.TargetBlank')) {
-            $modules[] = 'TargetBlank';
-        }
  
          // merge in custom modules
          $modules = array_merge($modules, $this->userModules);
@@ -369,13 +364,6 @@ class HTMLPurifier_HTMLModuleManager
                  // :TODO:
                  // non-standalone definitions that don't have a standalone
                  // to merge into could be deferred to the end
-                // HOWEVER, it is perfectly valid for a non-standalone
-                // definition to lack a standalone definition, even
-                // after all processing: this allows us to safely
-                // specify extra attributes for elements that may not be
-                // enabled all in one place.  In particular, this might
-                // be the case for trusted elements.  WARNING: care must
-                // be taken that the /extra/ definitions are all safe.
                  continue;
              }
  
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php

new file mode 100644 (file)

index 0000000..1d358c7
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php
@@ -0,0 +1,139 @@
+<?php
+
+/**
+ * Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
+ *
+ * PEAR, not suprisingly, also has a SAX parser for HTML.  I don't know
+ * very much about implementation, but it's fairly well written.  However, that
+ * abstraction comes at a price: performance. You need to have it installed,
+ * and if the API changes, it might break our adapter. Not sure whether or not
+ * it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
+ * text and attributes).
+ *
+ * Quite personally, I don't recommend using the PEAR class, and the defaults
+ * don't use it. The unit tests do perform the tests on the SAX parser too, but
+ * whatever it does for poorly formed HTML is up to it.
+ *
+ * @todo Generalize so that XML_HTMLSax is also supported.
+ *
+ * @warning Entity-resolution inside attributes is broken.
+ */
+
+class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
+{
+
+    /**
+     * Internal accumulator array for SAX parsers.
+     */
+    protected $tokens = array();
+    protected $last_token_was_empty;
+
+    private $parent_handler;
+    private $stack = array();
+
+    public function tokenizeHTML($string, $config, $context) {
+
+        $this->tokens = array();
+        $this->last_token_was_empty = false;
+
+        $string = $this->normalize($string, $config, $context);
+
+        $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
+
+        $parser = new XML_HTMLSax3();
+        $parser->set_object($this);
+        $parser->set_element_handler('openHandler','closeHandler');
+        $parser->set_data_handler('dataHandler');
+        $parser->set_escape_handler('escapeHandler');
+
+        // doesn't seem to work correctly for attributes
+        $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
+
+        $parser->parse($string);
+
+        restore_error_handler();
+
+        return $this->tokens;
+
+    }
+
+    /**
+     * Open tag event handler, interface is defined by PEAR package.
+     */
+    public function openHandler(&$parser, $name, $attrs, $closed) {
+        // entities are not resolved in attrs
+        foreach ($attrs as $key => $attr) {
+            $attrs[$key] = $this->parseData($attr);
+        }
+        if ($closed) {
+            $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
+            $this->last_token_was_empty = true;
+        } else {
+            $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
+        }
+        $this->stack[] = $name;
+        return true;
+    }
+
+    /**
+     * Close tag event handler, interface is defined by PEAR package.
+     */
+    public function closeHandler(&$parser, $name) {
+        // HTMLSax3 seems to always send empty tags an extra close tag
+        // check and ignore if you see it:
+        // [TESTME] to make sure it doesn't overreach
+        if ($this->last_token_was_empty) {
+            $this->last_token_was_empty = false;
+            return true;
+        }
+        $this->tokens[] = new HTMLPurifier_Token_End($name);
+        if (!empty($this->stack)) array_pop($this->stack);
+        return true;
+    }
+
+    /**
+     * Data event handler, interface is defined by PEAR package.
+     */
+    public function dataHandler(&$parser, $data) {
+        $this->last_token_was_empty = false;
+        $this->tokens[] = new HTMLPurifier_Token_Text($data);
+        return true;
+    }
+
+    /**
+     * Escaped text handler, interface is defined by PEAR package.
+     */
+    public function escapeHandler(&$parser, $data) {
+        if (strpos($data, '--') === 0) {
+            // remove trailing and leading double-dashes
+            $data = substr($data, 2);
+            if (strlen($data) >= 2 && substr($data, -2) == "--") {
+                $data = substr($data, 0, -2);
+            }
+            if (isset($this->stack[sizeof($this->stack) - 1]) &&
+                $this->stack[sizeof($this->stack) - 1] == "style") {
+                $this->tokens[] = new HTMLPurifier_Token_Text($data);
+            } else {
+                $this->tokens[] = new HTMLPurifier_Token_Comment($data);
+            }
+            $this->last_token_was_empty = false;
+        }
+        // CDATA is handled elsewhere, but if it was handled here:
+        //if (strpos($data, '[CDATA[') === 0) {
+        //    $this->tokens[] = new HTMLPurifier_Token_Text(
+        //        substr($data, 7, strlen($data) - 9) );
+        //}
+        return true;
+    }
+
+    /**
+     * An error handler that mutes strict errors
+     */
+    public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
+        if ($errno == E_STRICT) return;
+        return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php

index 92aefd33e274283935168caf91297ec18f07ee90..816490b7996ade532876d42861524db931c7b1b9 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php
@@ -11,6 +11,8 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
       */
      protected $strategies = array();
  
+    abstract public function __construct();
+
      public function execute($tokens, $config, $context) {
          foreach ($this->strategies as $strategy) {
              $tokens = $strategy->execute($tokens, $config, $context);
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php

index bccaf14d3c53e653e8ad5bae64ba226bcfb711d1..cf3a33e406eacd22f71502f93d7a2a4484aba3b2 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -21,9 +21,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
  
          // currently only used to determine if comments should be kept
          $trusted = $config->get('HTML.Trusted');
-        $comment_lookup = $config->get('HTML.AllowedComments');
-        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
-        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
  
          $remove_script_contents = $config->get('Core.RemoveScriptContents');
          $hidden_elements     = $config->get('Core.HiddenElements');
@@ -131,36 +128,22 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                  if ($textify_comments !== false) {
                      $data = $token->data;
                      $token = new HTMLPurifier_Token_Text($data);
-                } elseif ($trusted || $check_comments) {
-                    // always cleanup comments
-                    $trailing_hyphen = false;
+                } elseif ($trusted) {
+                    // keep, but perform comment cleaning
                      if ($e) {
                          // perform check whether or not there's a trailing hyphen
                          if (substr($token->data, -1) == '-') {
-                            $trailing_hyphen = true;
+                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
                          }
                      }
                      $token->data = rtrim($token->data, '-');
                      $found_double_hyphen = false;
                      while (strpos($token->data, '--') !== false) {
-                        $found_double_hyphen = true;
-                        $token->data = str_replace('--', '-', $token->data);
-                    }
-                    if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
-                        // OK good
-                        if ($e) {
-                            if ($trailing_hyphen) {
-                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
-                            }
-                            if ($found_double_hyphen) {
-                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
-                            }
+                        if ($e && !$found_double_hyphen) {
+                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
                          }
-                    } else {
-                        if ($e) {
-                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
-                        }
-                        continue;
+                        $found_double_hyphen = true; // prevent double-erroring
+                        $token->data = str_replace('--', '-', $token->data);
                      }
                  } else {
                      // strip comments
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URI.php b/lib/htmlpurifier/library/HTMLPurifier/URI.php

index f158ef5e304c5b2576cfe71f977481d9fddd592d..efdfb2c680bee8a7076bb0d8c224c30c4e4a005e 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URI.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URI.php
@@ -40,7 +40,7 @@ class HTMLPurifier_URI
          } else {
              // no scheme: retrieve the default one
              $def = $config->getDefinition('URI');
-            $scheme_obj = $def->getDefaultScheme($config, $context);
+            $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
              if (!$scheme_obj) {
                  // something funky happened to the default scheme object
                  trigger_error(
@@ -199,44 +199,6 @@ class HTMLPurifier_URI
          return $result;
      }
  
-    /**
-     * Returns true if this URL might be considered a 'local' URL given
-     * the current context.  This is true when the host is null, or
-     * when it matches the host supplied to the configuration.
-     *
-     * Note that this does not do any scheme checking, so it is mostly
-     * only appropriate for metadata that doesn't care about protocol
-     * security.  isBenign is probably what you actually want.
-     */
-    public function isLocal($config, $context) {
-        if ($this->host === null) return true;
-        $uri_def = $config->getDefinition('URI');
-        if ($uri_def->host === $this->host) return true;
-        return false;
-    }
-
-    /**
-     * Returns true if this URL should be considered a 'benign' URL,
-     * that is:
-     *
-     *      - It is a local URL (isLocal), and
-     *      - It has a equal or better level of security
-     */
-    public function isBenign($config, $context) {
-        if (!$this->isLocal($config, $context)) return false;
-
-        $scheme_obj = $this->getSchemeObj($config, $context);
-        if (!$scheme_obj) return false; // conservative approach
-
-        $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
-        if ($current_scheme_obj->secure) {
-            if (!$scheme_obj->secure) {
-                return false;
-            }
-        }
-        return true;
-    }
-
  }
  
  // vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php

index 40e57bb7d81629f8828c18b14540db1530a7eb77..ea2b8fe245877781bde3248cd2eadfdda31747d8 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php
@@ -27,7 +27,6 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
          $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
          $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
          $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
-        $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
          $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
          $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
      }
@@ -53,13 +52,9 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
  
      protected function setupFilters($config) {
          foreach ($this->registeredFilters as $name => $filter) {
-            if ($filter->always_load) {
+            $conf = $config->get('URI.' . $name);
+            if ($conf !== false && $conf !== null) {
                  $this->addFilter($filter, $config);
-            } else {
-                $conf = $config->get('URI.' . $name);
-                if ($conf !== false && $conf !== null) {
-                    $this->addFilter($filter, $config);
-                }
              }
          }
          unset($this->registeredFilters);
@@ -77,10 +72,6 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
          if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
      }
  
-    public function getDefaultScheme($config, $context) {
-        return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
-    }
-
      public function filter(&$uri, $config, $context) {
          foreach ($this->filters as $name => $f) {
              $result = $f->filter($uri, $config, $context);
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php

index 6a1b0b08e4b260cd4e3a85732d2064bacbdeee0c..c116f93dffc02a198a97ffcc3454147d3e5e9c1c 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php
@@ -4,21 +4,7 @@
   * Chainable filters for custom URI processing.
   *
   * These filters can perform custom actions on a URI filter object,
- * including transformation or blacklisting.  A filter named Foo
- * must have a corresponding configuration directive %URI.Foo,
- * unless always_load is specified to be true.
- *
- * The following contexts may be available while URIFilters are being
- * processed:
- *
- *      - EmbeddedURI: true if URI is an embedded resource that will
- *        be loaded automatically on page load
- *      - CurrentToken: a reference to the token that is currently
- *        being processed
- *      - CurrentAttr: the name of the attribute that is currently being
- *        processed
- *      - CurrentCSSProperty: the name of the CSS property that is
- *        currently being processed (if applicable)
+ * including transformation or blacklisting.
   *
   * @warning This filter is called before scheme object validation occurs.
   *          Make sure, if you require a specific scheme object, you
@@ -39,15 +25,7 @@ abstract class HTMLPurifier_URIFilter
      public $post = false;
  
      /**
-     * True if this filter should always be loaded (this permits
-     * a filter to be named Foo without the corresponding %URI.Foo
-     * directive existing.)
-     */
-    public $always_load = false;
-
-    /**
-     * Performs initialization for the filter.  If the filter returns
-     * false, this means that it shouldn't be considered active.
+     * Performs initialization for the filter
       */
      public function prepare($config) {return true;}
  
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php

index 55fde3bf4d37c72e3ec9f72e5860718543bc82b7..045aa0992c662a88ba51cb556e67663467ceae78 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php
@@ -1,9 +1,5 @@
  <?php
  
-// It's not clear to me whether or not Punycode means that hostnames
-// do not have canonical forms anymore. As far as I can tell, it's
-// not a problem (punycoding should be identity when no Unicode
-// points are involved), but I'm not 100% sure
  class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
  {
      public $name = 'HostBlacklist';
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php

index de695df14beebb1911b4bd7fb173fcc3753f7ffb..efa10a6458ae6e1b171f4a94baeda41488f85a8e 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php
@@ -20,8 +20,13 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
  
          $scheme_obj = $uri->getSchemeObj($config, $context);
          if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
-        if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way
-        if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL
+        if (is_null($uri->host) || empty($scheme_obj->browsable)) {
+            return true;
+        }
+        // don't redirect if target host is our host
+        if ($uri->host === $config->getDefinition('URI')->host) {
+            return true;
+        }
  
          $this->makeReplace($uri, $config, $context);
          $this->replace = array_map('rawurlencode', $this->replace);
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php

deleted file mode 100644 (file)

index 284bb13..0000000
--- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php
+++ /dev/null
@@ -1,35 +0,0 @@
-<?php
-
-/**
- * Implements safety checks for safe iframes.
- *
- * @warning This filter is *critical* for ensuring that %HTML.SafeIframe
- * works safely.
- */
-class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
-{
-    public $name = 'SafeIframe';
-    public $always_load = true;
-    protected $regexp = NULL;
-    // XXX: The not so good bit about how this is all setup now is we
-    // can't check HTML.SafeIframe in the 'prepare' step: we have to
-    // defer till the actual filtering.
-    public function prepare($config) {
-        $this->regexp = $config->get('URI.SafeIframeRegexp');
-        return true;
-    }
-    public function filter(&$uri, $config, $context) {
-        // check if filter not applicable
-        if (!$config->get('HTML.SafeIframe')) return true;
-        // check if the filter should actually trigger
-        if (!$context->get('EmbeddedURI', true)) return true;
-        $token = $context->get('CurrentToken', true);
-        if (!($token && $token->name == 'iframe')) return true;
-        // check if we actually have some whitelists enabled
-        if ($this->regexp === null) return false;
-        // actually check the whitelists
-        return preg_match($this->regexp, $uri->toString());
-    }
-}
-
-// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php

index 7be958143ae37569b5d345391ba2a7081e912d9a..25eb8410b4f41ea5541a01e7ecacac349f19d536 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php
@@ -19,12 +19,6 @@ abstract class HTMLPurifier_URIScheme
       */
      public $browsable = false;
  
-    /**
-     * Whether or not data transmitted over this scheme is encrypted.
-     * https is secure, http is not.
-     */
-    public $secure = false;
-
      /**
       * Whether or not the URI always uses <hier_part>, resolves edge cases
       * with making relative URIs absolute
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php

index 159c2874eaf435a791363d699a304e98ba9aefae..29e380919f0d746e2e7d41c27002f2582b9a4826 100644 (file)
--- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
@@ -6,7 +6,6 @@
  class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
  
      public $default_port = 443;
-    public $secure = true;
  
  }
author	Andrew Dolgov <fox@madoka.volgo-balt.ru>
	Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)
committer	Andrew Dolgov <fox@madoka.volgo-balt.ru>
	Tue, 5 Jun 2012 17:52:37 +0000 (21:52 +0400)
lib/htmlpurifier/library/HTMLPurifier.includes.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier.safe-includes.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/Config.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksEscaping.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksScope.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.ExtractStyleBlocksTidyImpl.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/FilterParam.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.txt	[new file with mode: 0755]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/Encoder.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php	[new file with mode: 0644]	patch \| blob
lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URI.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIFilter.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php	[deleted file]	patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIScheme.php		patch \| blob \| blame \| history
lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php		patch \| blob \| blame \| history