Skip to content

Commit 9db60b1

Browse files
authored
Merge pull request #119 from tedious/issue_68
Regex Refactoring, Removal of Buffer
2 parents dfe6fc8 + 552a46c commit 9db60b1

10 files changed

+125
-34
lines changed

phpunit.xml.dist

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="./tests/bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" cacheDirectory=".phpunit.cache">
2+
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd"
4+
cacheDirectory=".phpunit.cache"
5+
failOnWarning="true"
6+
colors="true"
7+
bootstrap="./tests/bootstrap.php"
8+
displayDetailsOnTestsThatTriggerDeprecations="true"
9+
displayDetailsOnTestsThatTriggerWarnings="true"
10+
displayDetailsOnTestsThatTriggerErrors="true">
311
<coverage>
412
<include>
513
<directory suffix=".php">./src/JShrink/</directory>
614
</include>
715
<report>
8-
<text outputFile="php://stdout" showUncoveredFiles="false"/>
16+
<text outputFile="php://stdout" showUncoveredFiles="false" />
917
</report>
1018
</coverage>
1119
<testsuites>
@@ -19,5 +27,5 @@
1927
<group>development</group>
2028
</exclude>
2129
</groups>
22-
<logging/>
30+
<logging />
2331
</phpunit>

src/JShrink/Minifier.php

+94-31
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,20 @@ class Minifier
7474
*/
7575
protected $c;
7676

77+
/**
78+
* This character is only active when certain look ahead actions take place.
79+
*
80+
* @var string
81+
*/
82+
protected $last_char;
83+
84+
/**
85+
* This character is only active when certain look ahead actions take place.
86+
*
87+
* @var string
88+
*/
89+
protected $output;
90+
7791
/**
7892
* Contains the options for the current minification process.
7993
*
@@ -95,6 +109,9 @@ class Minifier
95109
*/
96110
protected static $defaultOptions = ['flaggedComments' => true];
97111

112+
113+
protected static $keywords = ["delete", "do", "for", "in", "instanceof", "return", "typeof", "yield"];
114+
98115
/**
99116
* Contains lock ids which are used to replace certain code patterns and
100117
* prevent them from being minified
@@ -115,17 +132,11 @@ class Minifier
115132
public static function minify($js, $options = [])
116133
{
117134
try {
118-
ob_start();
119-
120135
$jshrink = new Minifier();
121136
$js = $jshrink->lock($js);
122-
$jshrink->minifyDirectToOutput($js, $options);
123-
124-
// Sometimes there's a leading new line, so we trim that out here.
125-
$js = ltrim(ob_get_clean());
137+
$js = ltrim($jshrink->minifyToString($js, $options));
126138
$js = $jshrink->unlock($js);
127139
unset($jshrink);
128-
129140
return $js;
130141
} catch (\Exception $e) {
131142
if (isset($jshrink)) {
@@ -134,9 +145,6 @@ public static function minify($js, $options = [])
134145
$jshrink->clean();
135146
unset($jshrink);
136147
}
137-
138-
// without this call things get weird, with partially outputted js.
139-
ob_end_clean();
140148
throw $e;
141149
}
142150
}
@@ -148,11 +156,12 @@ public static function minify($js, $options = [])
148156
* @param string $js The raw javascript to be minified
149157
* @param array $options Various runtime options in an associative array
150158
*/
151-
protected function minifyDirectToOutput($js, $options)
159+
protected function minifyToString($js, $options)
152160
{
153161
$this->initialize($js, $options);
154162
$this->loop();
155163
$this->clean();
164+
return $this->output;
156165
}
157166

158167
/**
@@ -177,7 +186,9 @@ protected function initialize($js, $options)
177186
// Populate "a" with a new line, "b" with the first character, before
178187
// entering the loop
179188
$this->a = "\n";
180-
$this->b = $this->getReal();
189+
$this->b = "\n";
190+
$this->last_char = "\n";
191+
$this->output = "";
181192
}
182193

183194
/**
@@ -192,6 +203,14 @@ protected function initialize($js, $options)
192203
'[' => true,
193204
'@' => true];
194205

206+
207+
protected function echo($char) {
208+
echo($char);
209+
$this->output .= $char;
210+
$this->last_char = $char[-1];
211+
}
212+
213+
195214
/**
196215
* The primary action occurs here. This function loops through the input string,
197216
* outputting anything that's relevant and discarding anything that is not.
@@ -201,10 +220,11 @@ protected function loop()
201220
while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
202221
switch ($this->a) {
203222
// new lines
223+
case "\r":
204224
case "\n":
205225
// if the next line is something that can't stand alone preserve the newline
206226
if ($this->b !== false && isset($this->noNewLineCharacters[$this->b])) {
207-
echo $this->a;
227+
$this->echo($this->a);
208228
$this->saveString();
209229
break;
210230
}
@@ -220,22 +240,23 @@ protected function loop()
220240
// no break
221241
case ' ':
222242
if (static::isAlphaNumeric($this->b)) {
223-
echo $this->a;
243+
$this->echo($this->a);
224244
}
225245

226246
$this->saveString();
227247
break;
228248

229249
default:
230250
switch ($this->b) {
251+
case "\r":
231252
case "\n":
232253
if (strpos('}])+-"\'', $this->a) !== false) {
233-
echo $this->a;
254+
$this->echo($this->a);
234255
$this->saveString();
235256
break;
236257
} else {
237258
if (static::isAlphaNumeric($this->a)) {
238-
echo $this->a;
259+
$this->echo($this->a);
239260
$this->saveString();
240261
}
241262
}
@@ -254,7 +275,7 @@ protected function loop()
254275
continue 3;
255276
}
256277

257-
echo $this->a;
278+
$this->echo($this->a);
258279
$this->saveString();
259280
break;
260281
}
@@ -263,9 +284,20 @@ protected function loop()
263284
// do reg check of doom
264285
$this->b = $this->getReal();
265286

266-
if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
267-
$this->saveRegex();
287+
if ($this->b == '/') {
288+
$valid_tokens = "(,=:[!&|?\n";
289+
if (strpos($valid_tokens, $this->last_char) !== false || strpos($valid_tokens, $this->a) !== false) {
290+
// Regex can appear unquoted after these symbols
291+
$this->saveRegex();
292+
} else if ($this->endsInKeyword()) {
293+
// This block checks for the "return" token before the slash.
294+
$this->saveRegex();
295+
}
268296
}
297+
298+
// if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
299+
// $this->saveRegex();
300+
// }
269301
}
270302
}
271303

@@ -332,8 +364,25 @@ protected function getChar()
332364
*/
333365
protected function peek()
334366
{
335-
# Pull the next character but don't push the index.
336-
return $this->index < $this->len ? $this->input[$this->index] : false;
367+
if ($this->index >= $this->len) {
368+
return false;
369+
}
370+
371+
$char = $this->input[$this->index];
372+
# Convert all line endings to unix standard.
373+
# `\r\n` converts to `\n\n` and is minified.
374+
if ($char == "\r") {
375+
$char = "\n";
376+
}
377+
378+
// Normalize all whitespace except for the newline character into a
379+
// standard space.
380+
if ($char !== "\n" && $char < "\x20") {
381+
return ' ';
382+
}
383+
384+
# Return the next character but don't push the index.
385+
return $char;
337386
}
338387

339388
/**
@@ -428,17 +477,17 @@ protected function processMultiLineComments($startIndex)
428477
// If conditional comments or flagged comments are not the first thing in the script
429478
// we need to echo a and fill it with a space before moving on.
430479
if ($startIndex > 0) {
431-
echo $this->a;
480+
$this->echo($this->a);
432481
$this->a = " ";
433482

434483
// If the comment started on a new line we let it stay on the new line
435484
if ($this->input[($startIndex - 1)] === "\n") {
436-
echo "\n";
485+
$this->echo("\n");
437486
}
438487
}
439488

440489
$endPoint = ($this->index - 1) - $startIndex;
441-
echo substr($this->input, $startIndex, $endPoint);
490+
$this->echo(substr($this->input, $startIndex, $endPoint));
442491

443492
$this->c = $char;
444493

@@ -504,7 +553,7 @@ protected function saveString()
504553
$stringType = $this->a;
505554

506555
// Echo out that starting quote
507-
echo $this->a;
556+
$this->echo($this->a);
508557

509558
// Loop until the string is done
510559
// Grab the very next character and load it into a
@@ -523,7 +572,7 @@ protected function saveString()
523572
// block below.
524573
case "\n":
525574
if ($stringType === '`') {
526-
echo $this->a;
575+
$this->echo($this->a);
527576
} else {
528577
throw new \RuntimeException('Unclosed string at position: ' . $startpos);
529578
}
@@ -543,14 +592,14 @@ protected function saveString()
543592
}
544593

545594
// echo out the escaped character and restart the loop.
546-
echo $this->a . $this->b;
595+
$this->echo($this->a . $this->b);
547596
break;
548597

549598

550599
// Since we're not dealing with any special cases we simply
551600
// output the character and continue our loop.
552601
default:
553-
echo $this->a;
602+
$this->echo($this->a);
554603
}
555604
}
556605
}
@@ -563,23 +612,23 @@ protected function saveString()
563612
*/
564613
protected function saveRegex()
565614
{
566-
echo $this->a . $this->b;
615+
$this->echo($this->a . $this->b);
567616

568617
while (($this->a = $this->getChar()) !== false) {
569618
if ($this->a === '/') {
570619
break;
571620
}
572621

573622
if ($this->a === '\\') {
574-
echo $this->a;
623+
$this->echo($this->a);
575624
$this->a = $this->getChar();
576625
}
577626

578627
if ($this->a === "\n") {
579628
throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
580629
}
581630

582-
echo $this->a;
631+
$this->echo($this->a);
583632
}
584633
$this->b = $this->getReal();
585634
}
@@ -595,6 +644,20 @@ protected static function isAlphaNumeric($char)
595644
return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/';
596645
}
597646

647+
protected function endsInKeyword() {
648+
foreach(static::$keywords as $keyword) {
649+
if (str_ends_with($this->output, $keyword)) {
650+
return true;
651+
}
652+
if (str_ends_with($this->output, $keyword . " ")) {
653+
return true;
654+
}
655+
}
656+
return false;
657+
}
658+
659+
660+
598661
/**
599662
* Replace patterns in the given string and store the replacement
600663
*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
return /'/
2+
3+
typeof /'/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
function airplaneIsCarrierBased (model) {
2+
return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(
3+
model
4+
)
5+
}
6+
7+
console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/^(")$/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
function test (input) {
2+
return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)
3+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
return /'/
2+
typeof /'/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
function airplaneIsCarrierBased(model){return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(model)}
2+
console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/^(")$/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
function test(input){return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)}

0 commit comments

Comments
 (0)