]>
Commit | Line | Data |
---|---|---|
107997e6 | 1 | <?php |
22f5fdf8 AK |
2 | /* |
3 | * This file is part of the JShrink package. | |
107997e6 | 4 | * |
22f5fdf8 | 5 | * (c) Robert Hafner <tedivm@tedivm.com> |
107997e6 | 6 | * |
22f5fdf8 AK |
7 | * For the full copyright and license information, please view the LICENSE |
8 | * file that was distributed with this source code. | |
9 | */ | |
10 | ||
11 | /** | |
12 | * JShrink | |
107997e6 | 13 | * |
107997e6 AD |
14 | * |
15 | * @package JShrink | |
16 | * @author Robert Hafner <tedivm@tedivm.com> | |
107997e6 AD |
17 | */ |
18 | ||
22f5fdf8 | 19 | namespace JShrink; |
107997e6 AD |
20 | |
21 | /** | |
22 | * Minifier | |
23 | * | |
24 | * Usage - Minifier::minify($js); | |
25 | * Usage - Minifier::minify($js, $options); | |
26 | * Usage - Minifier::minify($js, array('flaggedComments' => false)); | |
27 | * | |
22f5fdf8 AK |
28 | * @package JShrink |
29 | * @author Robert Hafner <tedivm@tedivm.com> | |
30 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | |
107997e6 AD |
31 | */ |
32 | class Minifier | |
33 | { | |
22f5fdf8 AK |
34 | /** |
35 | * The input javascript to be minified. | |
36 | * | |
37 | * @var string | |
38 | */ | |
39 | protected $input; | |
40 | ||
41 | /** | |
42 | * The location of the character (in the input string) that is next to be | |
107997e6 | 43 | * processed. |
22f5fdf8 AK |
44 | * |
45 | * @var int | |
46 | */ | |
47 | protected $index = 0; | |
48 | ||
49 | /** | |
50 | * The first of the characters currently being looked at. | |
51 | * | |
52 | * @var string | |
53 | */ | |
54 | protected $a = ''; | |
55 | ||
56 | /** | |
57 | * The next character being looked at (after a); | |
58 | * | |
59 | * @var string | |
60 | */ | |
61 | protected $b = ''; | |
62 | ||
63 | /** | |
64 | * This character is only active when certain look ahead actions take place. | |
65 | * | |
66 | * @var string | |
67 | */ | |
68 | protected $c; | |
69 | ||
70 | /** | |
71 | * Contains the options for the current minification process. | |
72 | * | |
73 | * @var array | |
74 | */ | |
75 | protected $options; | |
76 | ||
77 | /** | |
78 | * Contains the default options for minification. This array is merged with | |
107997e6 AD |
79 | * the one passed in by the user to create the request specific set of |
80 | * options (stored in the $options attribute). | |
22f5fdf8 AK |
81 | * |
82 | * @var array | |
83 | */ | |
84 | protected static $defaultOptions = array('flaggedComments' => true); | |
85 | ||
86 | /** | |
87 | * Contains lock ids which are used to replace certain code patterns and | |
88 | * prevent them from being minified | |
89 | * | |
90 | * @var array | |
91 | */ | |
92 | protected $locks = array(); | |
93 | ||
94 | /** | |
95 | * Takes a string containing javascript and removes unneeded characters in | |
96 | * order to shrink the code without altering it's functionality. | |
97 | * | |
98 | * @param string $js The raw javascript to be minified | |
99 | * @param array $options Various runtime options in an associative array | |
100 | * @throws \Exception | |
101 | * @return bool|string | |
102 | */ | |
103 | public static function minify($js, $options = array()) | |
104 | { | |
105 | try { | |
106 | ob_start(); | |
107 | ||
108 | $jshrink = new Minifier(); | |
109 | $js = $jshrink->lock($js); | |
110 | $jshrink->minifyDirectToOutput($js, $options); | |
111 | ||
112 | // Sometimes there's a leading new line, so we trim that out here. | |
113 | $js = ltrim(ob_get_clean()); | |
114 | $js = $jshrink->unlock($js); | |
115 | unset($jshrink); | |
116 | ||
117 | return $js; | |
118 | ||
119 | } catch (\Exception $e) { | |
120 | ||
121 | if (isset($jshrink)) { | |
122 | // Since the breakdownScript function probably wasn't finished | |
123 | // we clean it out before discarding it. | |
124 | $jshrink->clean(); | |
125 | unset($jshrink); | |
126 | } | |
127 | ||
128 | // without this call things get weird, with partially outputted js. | |
129 | ob_end_clean(); | |
130 | throw $e; | |
131 | } | |
132 | } | |
133 | ||
134 | /** | |
135 | * Processes a javascript string and outputs only the required characters, | |
107997e6 | 136 | * stripping out all unneeded characters. |
22f5fdf8 AK |
137 | * |
138 | * @param string $js The raw javascript to be minified | |
139 | * @param array $options Various runtime options in an associative array | |
140 | */ | |
141 | protected function minifyDirectToOutput($js, $options) | |
142 | { | |
143 | $this->initialize($js, $options); | |
144 | $this->loop(); | |
145 | $this->clean(); | |
146 | } | |
147 | ||
148 | /** | |
149 | * Initializes internal variables, normalizes new lines, | |
150 | * | |
151 | * @param string $js The raw javascript to be minified | |
152 | * @param array $options Various runtime options in an associative array | |
153 | */ | |
154 | protected function initialize($js, $options) | |
155 | { | |
156 | $this->options = array_merge(static::$defaultOptions, $options); | |
157 | $js = str_replace("\r\n", "\n", $js); | |
158 | $js = str_replace('/**/', '', $js); | |
159 | $this->input = str_replace("\r", "\n", $js); | |
160 | ||
161 | // We add a newline to the end of the script to make it easier to deal | |
162 | // with comments at the bottom of the script- this prevents the unclosed | |
163 | // comment error that can otherwise occur. | |
164 | $this->input .= PHP_EOL; | |
165 | ||
166 | // Populate "a" with a new line, "b" with the first character, before | |
167 | // entering the loop | |
168 | $this->a = "\n"; | |
169 | $this->b = $this->getReal(); | |
170 | } | |
171 | ||
172 | /** | |
173 | * The primary action occurs here. This function loops through the input string, | |
174 | * outputting anything that's relevant and discarding anything that is not. | |
175 | */ | |
176 | protected function loop() | |
177 | { | |
178 | while ($this->a !== false && !is_null($this->a) && $this->a !== '') { | |
179 | ||
180 | switch ($this->a) { | |
181 | // new lines | |
182 | case "\n": | |
183 | // if the next line is something that can't stand alone preserve the newline | |
184 | if (strpos('(-+{[@', $this->b) !== false) { | |
185 | echo $this->a; | |
186 | $this->saveString(); | |
187 | break; | |
188 | } | |
189 | ||
190 | // if B is a space we skip the rest of the switch block and go down to the | |
191 | // string/regex check below, resetting $this->b with getReal | |
192 | if($this->b === ' ') | |
193 | break; | |
194 | ||
195 | // otherwise we treat the newline like a space | |
196 | ||
197 | case ' ': | |
198 | if(static::isAlphaNumeric($this->b)) | |
199 | echo $this->a; | |
200 | ||
201 | $this->saveString(); | |
202 | break; | |
203 | ||
204 | default: | |
205 | switch ($this->b) { | |
206 | case "\n": | |
207 | if (strpos('}])+-"\'', $this->a) !== false) { | |
208 | echo $this->a; | |
209 | $this->saveString(); | |
210 | break; | |
211 | } else { | |
212 | if (static::isAlphaNumeric($this->a)) { | |
213 | echo $this->a; | |
214 | $this->saveString(); | |
215 | } | |
216 | } | |
217 | break; | |
218 | ||
219 | case ' ': | |
220 | if(!static::isAlphaNumeric($this->a)) | |
221 | break; | |
222 | ||
223 | default: | |
224 | // check for some regex that breaks stuff | |
225 | if ($this->a === '/' && ($this->b === '\'' || $this->b === '"')) { | |
226 | $this->saveRegex(); | |
227 | continue; | |
228 | } | |
229 | ||
230 | echo $this->a; | |
231 | $this->saveString(); | |
232 | break; | |
233 | } | |
234 | } | |
235 | ||
236 | // do reg check of doom | |
237 | $this->b = $this->getReal(); | |
238 | ||
239 | if(($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) | |
240 | $this->saveRegex(); | |
241 | } | |
242 | } | |
243 | ||
244 | /** | |
245 | * Resets attributes that do not need to be stored between requests so that | |
246 | * the next request is ready to go. Another reason for this is to make sure | |
247 | * the variables are cleared and are not taking up memory. | |
248 | */ | |
249 | protected function clean() | |
250 | { | |
251 | unset($this->input); | |
252 | $this->index = 0; | |
253 | $this->a = $this->b = ''; | |
254 | unset($this->c); | |
255 | unset($this->options); | |
256 | } | |
257 | ||
258 | /** | |
259 | * Returns the next string for processing based off of the current index. | |
260 | * | |
261 | * @return string | |
262 | */ | |
263 | protected function getChar() | |
264 | { | |
265 | // Check to see if we had anything in the look ahead buffer and use that. | |
266 | if (isset($this->c)) { | |
267 | $char = $this->c; | |
268 | unset($this->c); | |
269 | ||
270 | // Otherwise we start pulling from the input. | |
271 | } else { | |
272 | $char = substr($this->input, $this->index, 1); | |
273 | ||
274 | // If the next character doesn't exist return false. | |
275 | if (isset($char) && $char === false) { | |
276 | return false; | |
277 | } | |
278 | ||
279 | // Otherwise increment the pointer and use this char. | |
280 | $this->index++; | |
281 | } | |
282 | ||
283 | // Normalize all whitespace except for the newline character into a | |
284 | // standard space. | |
285 | if($char !== "\n" && ord($char) < 32) | |
286 | ||
287 | return ' '; | |
288 | ||
289 | return $char; | |
290 | } | |
291 | ||
292 | /** | |
293 | * This function gets the next "real" character. It is essentially a wrapper | |
107997e6 AD |
294 | * around the getChar function that skips comments. This has significant |
295 | * performance benefits as the skipping is done using native functions (ie, | |
296 | * c code) rather than in script php. | |
22f5fdf8 AK |
297 | * |
298 | * | |
299 | * @return string Next 'real' character to be processed. | |
300 | * @throws \RuntimeException | |
301 | */ | |
302 | protected function getReal() | |
303 | { | |
304 | $startIndex = $this->index; | |
305 | $char = $this->getChar(); | |
306 | ||
307 | // Check to see if we're potentially in a comment | |
308 | if ($char !== '/') { | |
309 | return $char; | |
310 | } | |
311 | ||
312 | $this->c = $this->getChar(); | |
313 | ||
314 | if ($this->c === '/') { | |
315 | return $this->processOneLineComments($startIndex); | |
316 | ||
317 | } elseif ($this->c === '*') { | |
318 | return $this->processMultiLineComments($startIndex); | |
319 | } | |
320 | ||
321 | return $char; | |
322 | } | |
323 | ||
324 | /** | |
325 | * Removed one line comments, with the exception of some very specific types of | |
326 | * conditional comments. | |
327 | * | |
328 | * @param int $startIndex The index point where "getReal" function started | |
329 | * @return string | |
330 | */ | |
331 | protected function processOneLineComments($startIndex) | |
332 | { | |
333 | $thirdCommentString = substr($this->input, $this->index, 1); | |
334 | ||
335 | // kill rest of line | |
336 | $this->getNext("\n"); | |
337 | ||
338 | if ($thirdCommentString == '@') { | |
339 | $endPoint = $this->index - $startIndex; | |
340 | unset($this->c); | |
341 | $char = "\n" . substr($this->input, $startIndex, $endPoint); | |
342 | } else { | |
343 | // first one is contents of $this->c | |
344 | $this->getChar(); | |
345 | $char = $this->getChar(); | |
346 | } | |
347 | ||
348 | return $char; | |
349 | } | |
350 | ||
351 | /** | |
352 | * Skips multiline comments where appropriate, and includes them where needed. | |
353 | * Conditional comments and "license" style blocks are preserved. | |
354 | * | |
355 | * @param int $startIndex The index point where "getReal" function started | |
356 | * @return bool|string False if there's no character | |
357 | * @throws \RuntimeException Unclosed comments will throw an error | |
358 | */ | |
359 | protected function processMultiLineComments($startIndex) | |
360 | { | |
361 | $this->getChar(); // current C | |
362 | $thirdCommentString = $this->getChar(); | |
363 | ||
364 | // kill everything up to the next */ if it's there | |
365 | if ($this->getNext('*/')) { | |
366 | ||
367 | $this->getChar(); // get * | |
368 | $this->getChar(); // get / | |
369 | $char = $this->getChar(); // get next real character | |
370 | ||
371 | // Now we reinsert conditional comments and YUI-style licensing comments | |
372 | if (($this->options['flaggedComments'] && $thirdCommentString === '!') | |
373 | || ($thirdCommentString === '@') ) { | |
374 | ||
375 | // If conditional comments or flagged comments are not the first thing in the script | |
376 | // we need to echo a and fill it with a space before moving on. | |
377 | if ($startIndex > 0) { | |
378 | echo $this->a; | |
379 | $this->a = " "; | |
380 | ||
381 | // If the comment started on a new line we let it stay on the new line | |
382 | if ($this->input[($startIndex - 1)] === "\n") { | |
383 | echo "\n"; | |
384 | } | |
385 | } | |
386 | ||
387 | $endPoint = ($this->index - 1) - $startIndex; | |
388 | echo substr($this->input, $startIndex, $endPoint); | |
389 | ||
390 | return $char; | |
391 | } | |
392 | ||
393 | } else { | |
394 | $char = false; | |
395 | } | |
396 | ||
397 | if($char === false) | |
398 | throw new \RuntimeException('Unclosed multiline comment at position: ' . ($this->index - 2)); | |
399 | ||
400 | // if we're here c is part of the comment and therefore tossed | |
401 | if(isset($this->c)) | |
402 | unset($this->c); | |
403 | ||
404 | return $char; | |
405 | } | |
406 | ||
407 | /** | |
408 | * Pushes the index ahead to the next instance of the supplied string. If it | |
409 | * is found the first character of the string is returned and the index is set | |
410 | * to it's position. | |
411 | * | |
412 | * @param string $string | |
413 | * @return string|false Returns the first character of the string or false. | |
414 | */ | |
415 | protected function getNext($string) | |
416 | { | |
417 | // Find the next occurrence of "string" after the current position. | |
418 | $pos = strpos($this->input, $string, $this->index); | |
419 | ||
420 | // If it's not there return false. | |
421 | if($pos === false) | |
422 | ||
423 | return false; | |
424 | ||
425 | // Adjust position of index to jump ahead to the asked for string | |
426 | $this->index = $pos; | |
427 | ||
428 | // Return the first character of that string. | |
429 | return substr($this->input, $this->index, 1); | |
430 | } | |
431 | ||
432 | /** | |
433 | * When a javascript string is detected this function crawls for the end of | |
107997e6 | 434 | * it and saves the whole string. |
22f5fdf8 AK |
435 | * |
436 | * @throws \RuntimeException Unclosed strings will throw an error | |
437 | */ | |
438 | protected function saveString() | |
439 | { | |
440 | $startpos = $this->index; | |
441 | ||
442 | // saveString is always called after a gets cleared, so we push b into | |
443 | // that spot. | |
444 | $this->a = $this->b; | |
445 | ||
446 | // If this isn't a string we don't need to do anything. | |
447 | if ($this->a !== "'" && $this->a !== '"') { | |
448 | return; | |
449 | } | |
450 | ||
451 | // String type is the quote used, " or ' | |
452 | $stringType = $this->a; | |
453 | ||
454 | // Echo out that starting quote | |
455 | echo $this->a; | |
456 | ||
457 | // Loop until the string is done | |
458 | while (true) { | |
459 | ||
460 | // Grab the very next character and load it into a | |
461 | $this->a = $this->getChar(); | |
462 | ||
463 | switch ($this->a) { | |
464 | ||
465 | // If the string opener (single or double quote) is used | |
466 | // output it and break out of the while loop- | |
467 | // The string is finished! | |
468 | case $stringType: | |
469 | break 2; | |
470 | ||
471 | // New lines in strings without line delimiters are bad- actual | |
472 | // new lines will be represented by the string \n and not the actual | |
473 | // character, so those will be treated just fine using the switch | |
474 | // block below. | |
475 | case "\n": | |
476 | throw new \RuntimeException('Unclosed string at position: ' . $startpos ); | |
477 | break; | |
478 | ||
479 | // Escaped characters get picked up here. If it's an escaped new line it's not really needed | |
480 | case '\\': | |
481 | ||
482 | // a is a slash. We want to keep it, and the next character, | |
483 | // unless it's a new line. New lines as actual strings will be | |
484 | // preserved, but escaped new lines should be reduced. | |
485 | $this->b = $this->getChar(); | |
486 | ||
487 | // If b is a new line we discard a and b and restart the loop. | |
488 | if ($this->b === "\n") { | |
489 | break; | |
490 | } | |
491 | ||
492 | // echo out the escaped character and restart the loop. | |
493 | echo $this->a . $this->b; | |
494 | break; | |
495 | ||
496 | ||
497 | // Since we're not dealing with any special cases we simply | |
498 | // output the character and continue our loop. | |
499 | default: | |
500 | echo $this->a; | |
501 | } | |
502 | } | |
503 | } | |
504 | ||
505 | /** | |
506 | * When a regular expression is detected this function crawls for the end of | |
107997e6 | 507 | * it and saves the whole regex. |
22f5fdf8 AK |
508 | * |
509 | * @throws \RuntimeException Unclosed regex will throw an error | |
510 | */ | |
511 | protected function saveRegex() | |
512 | { | |
513 | echo $this->a . $this->b; | |
514 | ||
515 | while (($this->a = $this->getChar()) !== false) { | |
516 | if($this->a === '/') | |
517 | break; | |
518 | ||
519 | if ($this->a === '\\') { | |
520 | echo $this->a; | |
521 | $this->a = $this->getChar(); | |
522 | } | |
523 | ||
524 | if($this->a === "\n") | |
525 | throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index); | |
526 | ||
527 | echo $this->a; | |
528 | } | |
529 | $this->b = $this->getReal(); | |
530 | } | |
531 | ||
532 | /** | |
533 | * Checks to see if a character is alphanumeric. | |
534 | * | |
535 | * @param string $char Just one character | |
536 | * @return bool | |
537 | */ | |
538 | protected static function isAlphaNumeric($char) | |
539 | { | |
540 | return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/'; | |
541 | } | |
542 | ||
543 | /** | |
544 | * Replace patterns in the given string and store the replacement | |
545 | * | |
546 | * @param string $js The string to lock | |
547 | * @return bool | |
548 | */ | |
549 | protected function lock($js) | |
550 | { | |
551 | /* lock things like <code>"asd" + ++x;</code> */ | |
552 | $lock = '"LOCK---' . crc32(time()) . '"'; | |
553 | ||
554 | $matches = array(); | |
555 | preg_match('/([+-])(\s+)([+-])/S', $js, $matches); | |
556 | if (empty($matches)) { | |
557 | return $js; | |
558 | } | |
559 | ||
560 | $this->locks[$lock] = $matches[2]; | |
561 | ||
562 | $js = preg_replace('/([+-])\s+([+-])/S', "$1{$lock}$2", $js); | |
563 | /* -- */ | |
564 | ||
565 | return $js; | |
566 | } | |
567 | ||
568 | /** | |
569 | * Replace "locks" with the original characters | |
570 | * | |
571 | * @param string $js The string to unlock | |
572 | * @return bool | |
573 | */ | |
574 | protected function unlock($js) | |
575 | { | |
576 | if (empty($this->locks)) { | |
577 | return $js; | |
578 | } | |
579 | ||
580 | foreach ($this->locks as $lock => $replacement) { | |
581 | $js = str_replace($lock, $replacement, $js); | |
582 | } | |
583 | ||
584 | return $js; | |
585 | } | |
586 | ||
587 | } |