lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php

   1 <?php
   2
   3 /**
   4  * Validates contents based on NMTOKENS attribute type.
   5  * @note The only current use for this is the class attribute in HTML
   6  * @note Could have some functionality factored out into Nmtoken class
   7  * @warning We cannot assume this class will be used only for 'class'
   8  *          attributes. Not sure how to hook in magic behavior, then.
   9  */
  10 class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
  11 {
  12
  13     public function validate($string, $config, $context) {
  14
  15         $string = trim($string);
  16
  17         // early abort: '' and '0' (strings that convert to false) are invalid
  18         if (!$string) return false;
  19
  20         // OPTIMIZABLE!
  21         // do the preg_match, capture all subpatterns for reformulation
  22
  23         // we don't support U+00A1 and up codepoints or
  24         // escaping because I don't know how to do that with regexps
  25         // and plus it would complicate optimization efforts (you never
  26         // see that anyway).
  27         $matches = array();
  28         $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
  29                    '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
  30                    '(?:(?=\s)|\z)/'; // look ahead for space or string end
  31         preg_match_all($pattern, $string, $matches);
  32
  33         if (empty($matches[1])) return false;
  34
  35         // reconstruct string
  36         $new_string = '';
  37         foreach ($matches[1] as $token) {
  38             $new_string .= $token . ' ';
  39         }
  40         $new_string = rtrim($new_string);
  41
  42         return $new_string;
  43
  44     }
  45
  46 }
  47
  48 // vim: et sw=4 sts=4