root/trunk/midcom/build/class.JavaScriptPacker.php

Revision 17211, 26.3 kB (checked in by piotras, 2 months ago)

Bumped midcom version, ported changes from midcom_2_8 branch

  • Property svn:executable set to *
Line 
1 <?php
2 /* 7 December 2006. version 1.0
3  *
4  * This is the php version of the Dean Edwards JavaScript 's Packer,
5  * Based on :
6  *
7  * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
8  * a multi-pattern parser.
9  * KNOWN BUG: erroneous behavior when using escapeChar with a replacement
10  * value that is a function
11  *
12  * packer, version 2.0.2 (2005-08-19) Copyright 2004-2005, Dean Edwards
13  *
14  * License: http://creativecommons.org/licenses/LGPL/2.1/
15  *
16  * Ported to PHP by Nicolas Martin.
17  *
18  * ----------------------------------------------------------------------
19  *
20  * examples of usage :
21  * $myPacker = new JavaScriptPacker($script, 62, true, false);
22  * $packed = $myPacker->pack();
23  *
24  * or
25  *
26  * $myPacker = new JavaScriptPacker($script, 'Normal', true, false);
27  * $packed = $myPacker->pack();
28  *
29  * or (default values)
30  *
31  * $myPacker = new JavaScriptPacker($script);
32  * $packed = $myPacker->pack();
33  *
34  *
35  * params of the constructor :
36  * $script:       the JavaScript to pack, string.
37  * $encoding:     level of encoding, int or string :
38  *                0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'.
39  *                default: 62.
40  * $fastDecode:   include the fast decoder in the packed result, boolean.
41  *                default : true.
42  * $specialChars: if you are flagged your private and local variables
43  *                in the script, boolean.
44  *                default: false.
45  *
46  * The pack() method return the compressed JavasScript, as a string.
47  *
48  * see http://dean.edwards.name/packer/usage/ for more information.
49  *
50  * Notes :
51  * # need PHP 5 . Tested with PHP 5.1.2
52  *
53  * # The packed result may be different than with the Dean Edwards
54  *   version, but with the same length. The reason is that the PHP
55  *   function usort to sort array don't necessarily preserve the
56  *   original order of two equal member. The Javascript sort function
57  *   in fact preserve this order (but that's not require by the
58  *   ECMAScript standard). So the encoded keywords order can be
59  *   different in the two results.
60  *
61  * # Be careful with the 'High ASCII' Level encoding if you use
62  *   UTF-8 in your files...
63  */
64
65
66 class JavaScriptPacker {
67     // constants
68     const IGNORE = '$1';
69
70     // validate parameters
71     private $_script = '';
72     private $_encoding = 62;
73     private $_fastDecode = true;
74     private $_specialChars = false;
75     
76     private $LITERAL_ENCODING = array(
77         'None' => 0,
78         'Numeric' => 10,
79         'Normal' => 62,
80         'High ASCII' => 95
81     );
82     
83     public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false)
84     {
85         $this->_script = $_script . "\n";
86         if (array_key_exists($_encoding, $this->LITERAL_ENCODING))
87             $_encoding = $this->LITERAL_ENCODING[$_encoding];
88         $this->_encoding = min((int)$_encoding, 95);
89         $this->_fastDecode = $_fastDecode;   
90         $this->_specialChars = $_specialChars;
91     }
92     
93     public function pack() {
94         $this->_addParser('_basicCompression');
95         if ($this->_specialChars)
96             $this->_addParser('_encodeSpecialChars');
97         if ($this->_encoding)
98             $this->_addParser('_encodeKeywords');
99         
100         // go!
101         return $this->_pack($this->_script);
102     }
103     
104     // apply all parsing routines
105     private function _pack($script) {
106         for ($i = 0; isset($this->_parsers[$i]); $i++) {
107             $script = call_user_func(array(&$this,$this->_parsers[$i]), $script);
108         }
109         return $script;
110     }
111     
112     // keep a list of parsing functions, they'll be executed all at once
113     private $_parsers = array();
114     private function _addParser($parser) {
115         $this->_parsers[] = $parser;
116     }
117     
118     // zero encoding - just removal of white space and comments
119     private function _basicCompression($script) {
120         $parser = new ParseMaster();
121         // make safe
122         $parser->escapeChar = '\\';
123         // protect strings
124         $parser->add('/\'[^\'\\n\\r]*\'/', self::IGNORE);
125         $parser->add('/"[^"\\n\\r]*"/', self::IGNORE);
126         // remove comments
127         $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' ');
128         $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' ');
129         // protect regular expressions
130         $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE
131         $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self::IGNORE);
132         // remove: ;;; doSomething();
133         if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/');
134         // remove redundant semi-colons
135         $parser->add('/\\(;;\\)/', self::IGNORE); // protect for (;;) loops
136         $parser->add('/;+\\s*([};])/', '$2');
137         // apply the above
138         $script = $parser->exec($script);
139
140         // remove white-space
141         $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3');
142         $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3');
143         $parser->add('/\\s+/', '');
144         // done
145         return $parser->exec($script);
146     }
147     
148     private function _encodeSpecialChars($script) {
149         $parser = new ParseMaster();
150         // replace: $name -> n, $$name -> na
151         $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/',
152                      array('fn' => '_replace_name')
153         );
154         // replace: _name -> _0, double-underscore (__name) is ignored
155         $regexp = '/\\b_[A-Za-z\\d]\\w*/';
156         // build the word list
157         $keywords = $this->_analyze($script, $regexp, '_encodePrivate');
158         // quick ref
159         $encoded = $keywords['encoded'];
160         
161         $parser->add($regexp,
162             array(
163                 'fn' => '_replace_encoded',
164                 'data' => $encoded
165             )
166         );
167         return $parser->exec($script);
168     }
169     
170     private function _encodeKeywords($script) {
171         // escape high-ascii values already in the script (i.e. in strings)
172         if ($this->_encoding > 62)
173             $script = $this->_escape95($script);
174         // create the parser
175         $parser = new ParseMaster();
176         $encode = $this->_getEncoder($this->_encoding);
177         // for high-ascii, don't encode single character low-ascii
178         $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/';
179         // build the word list
180         $keywords = $this->_analyze($script, $regexp, $encode);
181         $encoded = $keywords['encoded'];
182         
183         // encode
184         $parser->add($regexp,
185             array(
186                 'fn' => '_replace_encoded',
187                 'data' => $encoded
188             )
189         );
190         if (empty($script)) return $script;
191         else {
192             //$res = $parser->exec($script);
193             //$res = $this->_bootStrap($res, $keywords);
194             //return $res;
195             return $this->_bootStrap($parser->exec($script), $keywords);
196         }
197     }
198     
199     private function _analyze($script, $regexp, $encode) {
200         // analyse
201         // retrieve all words in the script
202         $all = array();
203         preg_match_all($regexp, $script, $all);
204         $_sorted = array(); // list of words sorted by frequency
205         $_encoded = array(); // dictionary of word->encoding
206         $_protected = array(); // instances of "protected" words
207         $all = $all[0]; // simulate the javascript comportement of global match
208         if (!empty($all)) {
209             $unsorted = array(); // same list, not sorted
210             $protected = array(); // "protected" words (dictionary of word->"word")
211             $value = array(); // dictionary of charCode->encoding (eg. 256->ff)
212             $this->_count = array(); // word->count
213             $i = count($all); $j = 0; //$word = null;
214             // count the occurrences - used for sorting later
215             do {
216                 --$i;
217                 $word = '$' . $all[$i];
218                 if (!isset($this->_count[$word])) {
219                     $this->_count[$word] = 0;
220                     $unsorted[$j] = $word;
221                     // make a dictionary of all of the protected words in this script
222                     //  these are words that might be mistaken for encoding
223                     //if (is_string($encode) && method_exists($this, $encode))
224                     $values[$j] = call_user_func(array(&$this, $encode), $j);
225                     $protected['$' . $values[$j]] = $j++;
226                 }
227                 // increment the word counter
228                 $this->_count[$word]++;
229             } while ($i > 0);
230             // prepare to sort the word list, first we must protect
231             //  words that are also used as codes. we assign them a code
232             //  equivalent to the word itself.
233             // e.g. if "do" falls within our encoding range
234             //      then we store keywords["do"] = "do";
235             // this avoids problems when decoding
236             $i = count($unsorted);
237             do {
238                 $word = $unsorted[--$i];
239                 if (isset($protected[$word]) /*!= null*/) {
240                     $_sorted[$protected[$word]] = substr($word, 1);
241                     $_protected[$protected[$word]] = true;
242                     $this->_count[$word] = 0;
243                 }
244             } while ($i);
245             
246             // sort the words by frequency
247             // Note: the javascript and php version of sort can be different :
248             // in php manual, usort :
249             // " If two members compare as equal,
250             // their order in the sorted array is undefined."
251             // so the final packed script is different of the Dean's javascript version
252             // but equivalent.
253             // the ECMAscript standard does not guarantee this behaviour,
254             // and thus not all browsers (e.g. Mozilla versions dating back to at
255             // least 2003) respect this.
256             usort($unsorted, array(&$this, '_sortWords'));
257             $j = 0;
258             // because there are "protected" words in the list
259             //  we must add the sorted words around them
260             do {
261                 if (!isset($_sorted[$i]))
262                     $_sorted[$i] = substr($unsorted[$j++], 1);
263                 $_encoded[$_sorted[$i]] = $values[$i];
264             } while (++$i < count($unsorted));
265         }
266         return array(
267             'sorted'  => $_sorted,
268             'encoded' => $_encoded,
269             'protected' => $_protected);
270     }
271     
272     private $_count = array();
273     private function _sortWords($match1, $match2) {
274         return $this->_count[$match2] - $this->_count[$match1];
275     }
276     
277     // build the boot function used for loading and decoding
278     private function _bootStrap($packed, $keywords) {
279         $ENCODE = $this->_safeRegExp('$encode\\($count\\)');
280
281         // $packed: the packed script
282         $packed = "'" . $this->_escape($packed) . "'";
283
284         // $ascii: base for encoding
285         $ascii = min(count($keywords['sorted']), $this->_encoding);
286         if ($ascii == 0) $ascii = 1;
287
288         // $count: number of words contained in the script
289         $count = count($keywords['sorted']);
290
291         // $keywords: list of words contained in the script
292         foreach ($keywords['protected'] as $i=>$value) {
293             $keywords['sorted'][$i] = '';
294         }
295         // convert from a string to an array
296         ksort($keywords['sorted']);
297         $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')";
298
299         $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii);
300         $encode = $this->_getJSFunction($encode);
301         $encode = preg_replace('/_encoding/','$ascii', $encode);
302         $encode = preg_replace('/arguments\\.callee/','$encode', $encode);
303         $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : '');
304
305         // $decode: code snippet to speed up decoding
306         if ($this->_fastDecode) {
307             // create the decoder
308             $decode = $this->_getJSFunction('_decodeBody');
309             if ($this->_encoding > 62)
310                 $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode);
311             // perform the encoding inline for lower ascii values
312             elseif ($ascii < 36)
313                 $decode = preg_replace($ENCODE, $inline, $decode);
314             // special case: when $count==0 there are no keywords. I want to keep
315             //  the basic shape of the unpacking funcion so i'll frig the code...
316             if ($count == 0)
317                 $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1);
318         }
319
320         // boot function
321         $unpack = $this->_getJSFunction('_unpack');
322         if ($this->_fastDecode) {
323             // insert the decoder
324             $this->buffer = $decode;
325             $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1);
326         }
327         $unpack = preg_replace('/"/', "'", $unpack);
328         if ($this->_encoding > 62) { // high-ascii
329             // get rid of the word-boundaries for regexp matches
330             $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack);
331         }
332         if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) {
333             // insert the encode function
334             $this->buffer = $encode;
335             $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1);
336         } else {
337             // perform the encoding inline
338             $unpack = preg_replace($ENCODE, $inline, $unpack);
339         }
340         // pack the boot function too
341         $unpackPacker = new JavaScriptPacker($unpack, 0, false, true);
342         $unpack = $unpackPacker->pack();
343         
344         // arguments
345         $params = array($packed, $ascii, $count, $keywords);
346         if ($this->_fastDecode) {
347             $params[] = 0;
348             $params[] = '{}';
349         }
350         $params = implode(',', $params);
351         
352         // the whole thing
353         return 'eval(' . $unpack . '(' . $params . "))\n";
354     }
355     
356     private $buffer;
357     private function _insertFastDecode($match) {
358         return '{' . $this->buffer . ';';
359     }
360     private function _insertFastEncode($match) {
361         return '{$encode=' . $this->buffer . ';';
362     }
363     
364     // mmm.. ..which one do i need ??
365     private function _getEncoder($ascii) {
366         return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ?
367                '_encode95' : '_encode62' : '_encode36' : '_encode10';
368     }
369     
370     // zero encoding
371     // characters: 0123456789
372     private function _encode10($charCode) {
373         return $charCode;
374     }
375     
376     // inherent base36 support
377     // characters: 0123456789abcdefghijklmnopqrstuvwxyz
378     private function _encode36($charCode) {
379         return base_convert($charCode, 10, 36);
380     }
381     
382     // hitch a ride on base36 and add the upper case alpha characters
383     // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
384     private function _encode62($charCode) {
385         $res = '';
386         if ($charCode >= $this->_encoding) {
387             $res = $this->_encode62((int)($charCode / $this->_encoding));
388         }
389         $charCode = $charCode % $this->_encoding;
390         
391         if ($charCode > 35)
392             return $res . chr($charCode + 29);
393         else
394             return $res . base_convert($charCode, 10, 36);
395     }
396     
397     // use high-ascii values
398     // characters:
399     private function _encode95($charCode) {
400         $res = '';
401         if ($charCode >= $this->_encoding)
402             $res = $this->_encode95($charCode / $this->_encoding);
403         
404         return $res . chr(($charCode % $this->_encoding) + 161);
405     }
406     
407     private function _safeRegExp($string) {
408         return '/'.preg_replace('/\$/', '\\\$', $string).'/';
409     }
410     
411     private function _encodePrivate($charCode) {
412         return "_" . $charCode;
413     }
414     
415     // protect characters used by the parser
416     private function _escape($script) {
417         return preg_replace('/([\\\\\'])/', '\\\$1', $script);
418     }
419     
420     // protect high-ascii characters already in the script
421     private function _escape95($script) {
422         return preg_replace_callback(
423             '/[\\xa1-\\xff]/',
424             array(&$this, '_escape95Bis'),
425             $script
426         );
427     }
428     private function _escape95Bis($match) {
429         return '\x'.((string)dechex(ord($match)));
430     }
431     
432     
433     private function _getJSFunction($aName) {
434         if (defined('self::JSFUNCTION'.$aName))
435             return constant('self::JSFUNCTION'.$aName);
436         else
437             return '';
438     }
439     
440     // JavaScript Functions used.
441     // Note : In Dean's version, these functions are converted
442     // with 'String(aFunctionName);'.
443     // This internal conversion complete the original code, ex :
444     // 'while (aBool) anAction();' is converted to
445     // 'while (aBool) { anAction(); }'.
446     // The JavaScript functions below are corrected.
447     
448     // unpacking function - this is the boot strap function
449     //  data extracted from this packing routine is passed to
450     //  this function when decoded in the target
451     // NOTE ! : without the ';' final.
452     const JSFUNCTION_unpack =
453
454 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
455     while ($count--) {
456         if ($keywords[$count]) {
457             $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
458         }
459     }
460     return $packed;
461 }';
462 /*
463 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
464     while ($count--)
465         if ($keywords[$count])
466             $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
467     return $packed;
468 }';
469 */
470     
471     // code-snippet inserted into the unpacker to speed up decoding
472     const JSFUNCTION_decodeBody =
473 //_decode = function() {
474 // does the browser support String.replace where the
475 //  replacement value is a function?
476
477 '    if (!\'\'.replace(/^/, String)) {
478         // decode all the values we need
479         while ($count--) {
480             $decode[$encode($count)] = $keywords[$count] || $encode($count);
481         }
482         // global replacement function
483         $keywords = [function ($encoded) {return $decode[$encoded]}];
484         // generic match
485         $encode = function () {return \'\\\\w+\'};
486         // reset the loop counter -  we are now doing a global replace
487         $count = 1;
488     }
489 ';
490 //};
491 /*
492 '    if (!\'\'.replace(/^/, String)) {
493         // decode all the values we need
494         while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
495         // global replacement function
496         $keywords = [function ($encoded) {return $decode[$encoded]}];
497         // generic match
498         $encode = function () {return\'\\\\w+\'};
499         // reset the loop counter -  we are now doing a global replace
500         $count = 1;
501     }';
502 */
503     
504      // zero encoding
505      // characters: 0123456789
506      const JSFUNCTION_encode10 =
507 'function($charCode) {
508     return $charCode;
509 }';//;';
510     
511      // inherent base36 support
512      // characters: 0123456789abcdefghijklmnopqrstuvwxyz
513      const JSFUNCTION_encode36 =
514 'function($charCode) {
515     return $charCode.toString(36);
516 }';//;';
517     
518     // hitch a ride on base36 and add the upper case alpha characters
519     // characters: 0123456789abcdefghijklmnopq