]> git.wh0rd.org - tt-rss.git/blob - vendor/JShrink/Minifier.php
move JShrink Minifier to vendor/
[tt-rss.git] / vendor / JShrink / Minifier.php
1 <?php
2 /*
3 * This file is part of the JShrink package.
4 *
5 * (c) Robert Hafner <tedivm@tedivm.com>
6 *
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
9 */
10
11 /**
12 * JShrink
13 *
14 *
15 * @package JShrink
16 * @author Robert Hafner <tedivm@tedivm.com>
17 */
18
19 namespace JShrink;
20
21 /**
22 * Minifier
23 *
24 * Usage - Minifier::minify($js);
25 * Usage - Minifier::minify($js, $options);
26 * Usage - Minifier::minify($js, array('flaggedComments' => false));
27 *
28 * @package JShrink
29 * @author Robert Hafner <tedivm@tedivm.com>
30 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
31 */
32 class Minifier
33 {
34 /**
35 * The input javascript to be minified.
36 *
37 * @var string
38 */
39 protected $input;
40
41 /**
42 * The location of the character (in the input string) that is next to be
43 * processed.
44 *
45 * @var int
46 */
47 protected $index = 0;
48
49 /**
50 * The first of the characters currently being looked at.
51 *
52 * @var string
53 */
54 protected $a = '';
55
56 /**
57 * The next character being looked at (after a);
58 *
59 * @var string
60 */
61 protected $b = '';
62
63 /**
64 * This character is only active when certain look ahead actions take place.
65 *
66 * @var string
67 */
68 protected $c;
69
70 /**
71 * Contains the options for the current minification process.
72 *
73 * @var array
74 */
75 protected $options;
76
77 /**
78 * Contains the default options for minification. This array is merged with
79 * the one passed in by the user to create the request specific set of
80 * options (stored in the $options attribute).
81 *
82 * @var array
83 */
84 protected static $defaultOptions = array('flaggedComments' => true);
85
86 /**
87 * Contains lock ids which are used to replace certain code patterns and
88 * prevent them from being minified
89 *
90 * @var array
91 */
92 protected $locks = array();
93
94 /**
95 * Takes a string containing javascript and removes unneeded characters in
96 * order to shrink the code without altering it's functionality.
97 *
98 * @param string $js The raw javascript to be minified
99 * @param array $options Various runtime options in an associative array
100 * @throws \Exception
101 * @return bool|string
102 */
103 public static function minify($js, $options = array())
104 {
105 try {
106 ob_start();
107
108 $jshrink = new Minifier();
109 $js = $jshrink->lock($js);
110 $jshrink->minifyDirectToOutput($js, $options);
111
112 // Sometimes there's a leading new line, so we trim that out here.
113 $js = ltrim(ob_get_clean());
114 $js = $jshrink->unlock($js);
115 unset($jshrink);
116
117 return $js;
118
119 } catch (\Exception $e) {
120
121 if (isset($jshrink)) {
122 // Since the breakdownScript function probably wasn't finished
123 // we clean it out before discarding it.
124 $jshrink->clean();
125 unset($jshrink);
126 }
127
128 // without this call things get weird, with partially outputted js.
129 ob_end_clean();
130 throw $e;
131 }
132 }
133
134 /**
135 * Processes a javascript string and outputs only the required characters,
136 * stripping out all unneeded characters.
137 *
138 * @param string $js The raw javascript to be minified
139 * @param array $options Various runtime options in an associative array
140 */
141 protected function minifyDirectToOutput($js, $options)
142 {
143 $this->initialize($js, $options);
144 $this->loop();
145 $this->clean();
146 }
147
148 /**
149 * Initializes internal variables, normalizes new lines,
150 *
151 * @param string $js The raw javascript to be minified
152 * @param array $options Various runtime options in an associative array
153 */
154 protected function initialize($js, $options)
155 {
156 $this->options = array_merge(static::$defaultOptions, $options);
157 $js = str_replace("\r\n", "\n", $js);
158 $js = str_replace('/**/', '', $js);
159 $this->input = str_replace("\r", "\n", $js);
160
161 // We add a newline to the end of the script to make it easier to deal
162 // with comments at the bottom of the script- this prevents the unclosed
163 // comment error that can otherwise occur.
164 $this->input .= PHP_EOL;
165
166 // Populate "a" with a new line, "b" with the first character, before
167 // entering the loop
168 $this->a = "\n";
169 $this->b = $this->getReal();
170 }
171
172 /**
173 * The primary action occurs here. This function loops through the input string,
174 * outputting anything that's relevant and discarding anything that is not.
175 */
176 protected function loop()
177 {
178 while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
179
180 switch ($this->a) {
181 // new lines
182 case "\n":
183 // if the next line is something that can't stand alone preserve the newline
184 if (strpos('(-+{[@', $this->b) !== false) {
185 echo $this->a;
186 $this->saveString();
187 break;
188 }
189
190 // if B is a space we skip the rest of the switch block and go down to the
191 // string/regex check below, resetting $this->b with getReal
192 if($this->b === ' ')
193 break;
194
195 // otherwise we treat the newline like a space
196
197 case ' ':
198 if(static::isAlphaNumeric($this->b))
199 echo $this->a;
200
201 $this->saveString();
202 break;
203
204 default:
205 switch ($this->b) {
206 case "\n":
207 if (strpos('}])+-"\'', $this->a) !== false) {
208 echo $this->a;
209 $this->saveString();
210 break;
211 } else {
212 if (static::isAlphaNumeric($this->a)) {
213 echo $this->a;
214 $this->saveString();
215 }
216 }
217 break;
218
219 case ' ':
220 if(!static::isAlphaNumeric($this->a))
221 break;
222
223 default:
224 // check for some regex that breaks stuff
225 if ($this->a === '/' && ($this->b === '\'' || $this->b === '"')) {
226 $this->saveRegex();
227 continue;
228 }
229
230 echo $this->a;
231 $this->saveString();
232 break;
233 }
234 }
235
236 // do reg check of doom
237 $this->b = $this->getReal();
238
239 if(($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false))
240 $this->saveRegex();
241 }
242 }
243
244 /**
245 * Resets attributes that do not need to be stored between requests so that
246 * the next request is ready to go. Another reason for this is to make sure
247 * the variables are cleared and are not taking up memory.
248 */
249 protected function clean()
250 {
251 unset($this->input);
252 $this->index = 0;
253 $this->a = $this->b = '';
254 unset($this->c);
255 unset($this->options);
256 }
257
258 /**
259 * Returns the next string for processing based off of the current index.
260 *
261 * @return string
262 */
263 protected function getChar()
264 {
265 // Check to see if we had anything in the look ahead buffer and use that.
266 if (isset($this->c)) {
267 $char = $this->c;
268 unset($this->c);
269
270 // Otherwise we start pulling from the input.
271 } else {
272 $char = substr($this->input, $this->index, 1);
273
274 // If the next character doesn't exist return false.
275 if (isset($char) && $char === false) {
276 return false;
277 }
278
279 // Otherwise increment the pointer and use this char.
280 $this->index++;
281 }
282
283 // Normalize all whitespace except for the newline character into a
284 // standard space.
285 if($char !== "\n" && ord($char) < 32)
286
287 return ' ';
288
289 return $char;
290 }
291
292 /**
293 * This function gets the next "real" character. It is essentially a wrapper
294 * around the getChar function that skips comments. This has significant
295 * performance benefits as the skipping is done using native functions (ie,
296 * c code) rather than in script php.
297 *
298 *
299 * @return string Next 'real' character to be processed.
300 * @throws \RuntimeException
301 */
302 protected function getReal()
303 {
304 $startIndex = $this->index;
305 $char = $this->getChar();
306
307 // Check to see if we're potentially in a comment
308 if ($char !== '/') {
309 return $char;
310 }
311
312 $this->c = $this->getChar();
313
314 if ($this->c === '/') {
315 return $this->processOneLineComments($startIndex);
316
317 } elseif ($this->c === '*') {
318 return $this->processMultiLineComments($startIndex);
319 }
320
321 return $char;
322 }
323
324 /**
325 * Removed one line comments, with the exception of some very specific types of
326 * conditional comments.
327 *
328 * @param int $startIndex The index point where "getReal" function started
329 * @return string
330 */
331 protected function processOneLineComments($startIndex)
332 {
333 $thirdCommentString = substr($this->input, $this->index, 1);
334
335 // kill rest of line
336 $this->getNext("\n");
337
338 if ($thirdCommentString == '@') {
339 $endPoint = $this->index - $startIndex;
340 unset($this->c);
341 $char = "\n" . substr($this->input, $startIndex, $endPoint);
342 } else {
343 // first one is contents of $this->c
344 $this->getChar();
345 $char = $this->getChar();
346 }
347
348 return $char;
349 }
350
351 /**
352 * Skips multiline comments where appropriate, and includes them where needed.
353 * Conditional comments and "license" style blocks are preserved.
354 *
355 * @param int $startIndex The index point where "getReal" function started
356 * @return bool|string False if there's no character
357 * @throws \RuntimeException Unclosed comments will throw an error
358 */
359 protected function processMultiLineComments($startIndex)
360 {
361 $this->getChar(); // current C
362 $thirdCommentString = $this->getChar();
363
364 // kill everything up to the next */ if it's there
365 if ($this->getNext('*/')) {
366
367 $this->getChar(); // get *
368 $this->getChar(); // get /
369 $char = $this->getChar(); // get next real character
370
371 // Now we reinsert conditional comments and YUI-style licensing comments
372 if (($this->options['flaggedComments'] && $thirdCommentString === '!')
373 || ($thirdCommentString === '@') ) {
374
375 // If conditional comments or flagged comments are not the first thing in the script
376 // we need to echo a and fill it with a space before moving on.
377 if ($startIndex > 0) {
378 echo $this->a;
379 $this->a = " ";
380
381 // If the comment started on a new line we let it stay on the new line
382 if ($this->input[($startIndex - 1)] === "\n") {
383 echo "\n";
384 }
385 }
386
387 $endPoint = ($this->index - 1) - $startIndex;
388 echo substr($this->input, $startIndex, $endPoint);
389
390 return $char;
391 }
392
393 } else {
394 $char = false;
395 }
396
397 if($char === false)
398 throw new \RuntimeException('Unclosed multiline comment at position: ' . ($this->index - 2));
399
400 // if we're here c is part of the comment and therefore tossed
401 if(isset($this->c))
402 unset($this->c);
403
404 return $char;
405 }
406
407 /**
408 * Pushes the index ahead to the next instance of the supplied string. If it
409 * is found the first character of the string is returned and the index is set
410 * to it's position.
411 *
412 * @param string $string
413 * @return string|false Returns the first character of the string or false.
414 */
415 protected function getNext($string)
416 {
417 // Find the next occurrence of "string" after the current position.
418 $pos = strpos($this->input, $string, $this->index);
419
420 // If it's not there return false.
421 if($pos === false)
422
423 return false;
424
425 // Adjust position of index to jump ahead to the asked for string
426 $this->index = $pos;
427
428 // Return the first character of that string.
429 return substr($this->input, $this->index, 1);
430 }
431
432 /**
433 * When a javascript string is detected this function crawls for the end of
434 * it and saves the whole string.
435 *
436 * @throws \RuntimeException Unclosed strings will throw an error
437 */
438 protected function saveString()
439 {
440 $startpos = $this->index;
441
442 // saveString is always called after a gets cleared, so we push b into
443 // that spot.
444 $this->a = $this->b;
445
446 // If this isn't a string we don't need to do anything.
447 if ($this->a !== "'" && $this->a !== '"') {
448 return;
449 }
450
451 // String type is the quote used, " or '
452 $stringType = $this->a;
453
454 // Echo out that starting quote
455 echo $this->a;
456
457 // Loop until the string is done
458 while (true) {
459
460 // Grab the very next character and load it into a
461 $this->a = $this->getChar();
462
463 switch ($this->a) {
464
465 // If the string opener (single or double quote) is used
466 // output it and break out of the while loop-
467 // The string is finished!
468 case $stringType:
469 break 2;
470
471 // New lines in strings without line delimiters are bad- actual
472 // new lines will be represented by the string \n and not the actual
473 // character, so those will be treated just fine using the switch
474 // block below.
475 case "\n":
476 throw new \RuntimeException('Unclosed string at position: ' . $startpos );
477 break;
478
479 // Escaped characters get picked up here. If it's an escaped new line it's not really needed
480 case '\\':
481
482 // a is a slash. We want to keep it, and the next character,
483 // unless it's a new line. New lines as actual strings will be
484 // preserved, but escaped new lines should be reduced.
485 $this->b = $this->getChar();
486
487 // If b is a new line we discard a and b and restart the loop.
488 if ($this->b === "\n") {
489 break;
490 }
491
492 // echo out the escaped character and restart the loop.
493 echo $this->a . $this->b;
494 break;
495
496
497 // Since we're not dealing with any special cases we simply
498 // output the character and continue our loop.
499 default:
500 echo $this->a;
501 }
502 }
503 }
504
505 /**
506 * When a regular expression is detected this function crawls for the end of
507 * it and saves the whole regex.
508 *
509 * @throws \RuntimeException Unclosed regex will throw an error
510 */
511 protected function saveRegex()
512 {
513 echo $this->a . $this->b;
514
515 while (($this->a = $this->getChar()) !== false) {
516 if($this->a === '/')
517 break;
518
519 if ($this->a === '\\') {
520 echo $this->a;
521 $this->a = $this->getChar();
522 }
523
524 if($this->a === "\n")
525 throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
526
527 echo $this->a;
528 }
529 $this->b = $this->getReal();
530 }
531
532 /**
533 * Checks to see if a character is alphanumeric.
534 *
535 * @param string $char Just one character
536 * @return bool
537 */
538 protected static function isAlphaNumeric($char)
539 {
540 return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/';
541 }
542
543 /**
544 * Replace patterns in the given string and store the replacement
545 *
546 * @param string $js The string to lock
547 * @return bool
548 */
549 protected function lock($js)
550 {
551 /* lock things like <code>"asd" + ++x;</code> */
552 $lock = '"LOCK---' . crc32(time()) . '"';
553
554 $matches = array();
555 preg_match('/([+-])(\s+)([+-])/S', $js, $matches);
556 if (empty($matches)) {
557 return $js;
558 }
559
560 $this->locks[$lock] = $matches[2];
561
562 $js = preg_replace('/([+-])\s+([+-])/S', "$1{$lock}$2", $js);
563 /* -- */
564
565 return $js;
566 }
567
568 /**
569 * Replace "locks" with the original characters
570 *
571 * @param string $js The string to unlock
572 * @return bool
573 */
574 protected function unlock($js)
575 {
576 if (empty($this->locks)) {
577 return $js;
578 }
579
580 foreach ($this->locks as $lock => $replacement) {
581 $js = str_replace($lock, $replacement, $js);
582 }
583
584 return $js;
585 }
586
587 }