Skip to content

Commit 324e783

Browse files
committed
Make DotNotationParser more efficient and robust
1 parent ada7646 commit 324e783

File tree

1 file changed

+52
-28
lines changed

1 file changed

+52
-28
lines changed

src/DotNotationParser.php

+52-28
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,17 @@ class DotNotationParser {
1818
*/
1919
public function parse( string $path ) : array {
2020
$out = [];
21-
$chars = preg_split('/(?<!^)(?!$)/u', $path, -1, PREG_SPLIT_NO_EMPTY) ?: [];
21+
$chars = $this->iterateGraphemes($path);
2222

23-
for(;;) {
24-
$token = current($chars);
25-
if( $token === false ) {
26-
break;
27-
}
23+
while( $chars->valid() ) {
24+
$token = $chars->current();
25+
$key = $chars->key();
2826

2927
switch( $token ) {
3028
case '.':
3129
throw new ParseException(
32-
sprintf('failed to parse path, expected string, got "%s" at %d', $token, key($chars)),
33-
key($chars),
30+
sprintf('failed to parse path, expected string, got "%s" at %d', $token, $key),
31+
$key,
3432
ParseException::CODE_UNEXPECTED_CHARACTER
3533
);
3634
case '"':
@@ -46,61 +44,87 @@ public function parse( string $path ) : array {
4644
}
4745

4846
/**
49-
* @param string[] $chars array of unicode characters by reference
47+
* @param \Iterator<int,string> $chars Generator of Unicode characters
5048
*/
51-
private function scanString( array &$chars ) : string {
49+
private function scanString( \Iterator $chars ) : string {
5250
$buff = '';
53-
for(;;) {
54-
$token = current($chars);
55-
if( $token === false || $token === '.' ) {
56-
next($chars);
51+
while( $chars->valid() ) {
52+
$token = $chars->current();
5753

54+
if( $token === '.' ) {
55+
$chars->next();
5856
break;
5957
}
6058

6159
$buff .= $token;
62-
next($chars);
60+
$chars->next();
6361
}
6462

6563
return $buff;
6664
}
6765

6866
/**
69-
* @param string[] $chars array of unicode characters by reference
67+
* @param \Iterator<int,string> $chars array of Unicode characters by reference
7068
*/
71-
private function scanQuotedString( array &$chars ) : string {
69+
private function scanQuotedString( \Iterator $chars ) : string {
7270
$buff = '';
7371

74-
next($chars);
75-
for(;;) {
76-
$token = current($chars);
77-
if( $token === false ) {
72+
$chars->next();
73+
$lastKey = 0;
74+
for( ; ; ) {
75+
$token = $chars->current();
76+
$key = $chars->key();
77+
78+
if( !$chars->valid() ) {
7879
throw new ParseException(
7980
'failed to parse path, expected ", got EOF',
80-
key($chars) ?: count($chars),
81+
$key ?? ($lastKey + 1),
8182
ParseException::CODE_UNEXPECTED_EOF
8283
);
8384
}
8485

8586
if( $token === '"' ) {
86-
$next = next($chars);
87-
if( $next === false || $next === '.' ) {
88-
next($chars);
87+
$chars->next();
88+
$next = $chars->current();
89+
$nextKey = $chars->key();
90+
91+
if( !$chars->valid() || $next === '.' ) {
92+
$chars->next();
8993
break;
9094
}
9195

9296
throw new ParseException(
93-
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, key($chars)),
94-
key($chars),
97+
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, $key),
98+
$nextKey ?? $key,
9599
ParseException::CODE_UNEXPECTED_CHARACTER
96100
);
97101
}
98102

99103
$buff .= $token;
100-
next($chars);
104+
105+
$lastKey = $key;
106+
$chars->next();
101107
}
102108

103109
return $buff;
104110
}
105111

112+
/**
113+
* Yields each grapheme (user‑visible “character”) from $s.
114+
*
115+
* @return \Generator<int,string>
116+
*/
117+
private function iterateGraphemes( string $s ) : \Generator {
118+
$off = 0;
119+
$len = strlen($s);
120+
121+
while( $off < $len && preg_match('/\X/u', $s, $m, 0, $off) ) {
122+
$g = $m[0]; // one grapheme cluster, UTF‑8 safe
123+
124+
yield $off => $g;
125+
126+
$off += strlen($g); // advance by its byte length
127+
}
128+
}
129+
106130
}

0 commit comments

Comments
 (0)