@@ -18,19 +18,17 @@ class DotNotationParser {
18
18
*/
19
19
public function parse ( string $ path ) : array {
20
20
$ out = [];
21
- $ chars = preg_split ( ' /(?<!^)(?!$)/u ' , $ path, - 1 , PREG_SPLIT_NO_EMPTY ) ?: [] ;
21
+ $ chars = $ this -> iterateGraphemes ( $ path) ;
22
22
23
- for (;;) {
24
- $ token = current ($ chars );
25
- if ( $ token === false ) {
26
- break ;
27
- }
23
+ while ( $ chars ->valid () ) {
24
+ $ token = $ chars ->current ();
25
+ $ key = $ chars ->key ();
28
26
29
27
switch ( $ token ) {
30
28
case '. ' :
31
29
throw new ParseException (
32
- sprintf ('failed to parse path, expected string, got "%s" at %d ' , $ token , key ( $ chars ) ),
33
- key ( $ chars ) ,
30
+ sprintf ('failed to parse path, expected string, got "%s" at %d ' , $ token , $ key ),
31
+ $ key ,
34
32
ParseException::CODE_UNEXPECTED_CHARACTER
35
33
);
36
34
case '" ' :
@@ -46,61 +44,87 @@ public function parse( string $path ) : array {
46
44
}
47
45
48
46
/**
49
- * @param string[] $chars array of unicode characters by reference
47
+ * @param \Iterator<int, string> $chars Generator of Unicode characters
50
48
*/
51
- private function scanString ( array & $ chars ) : string {
49
+ private function scanString ( \ Iterator $ chars ) : string {
52
50
$ buff = '' ;
53
- for (;;) {
54
- $ token = current ($ chars );
55
- if ( $ token === false || $ token === '. ' ) {
56
- next ($ chars );
51
+ while ( $ chars ->valid () ) {
52
+ $ token = $ chars ->current ();
57
53
54
+ if ( $ token === '. ' ) {
55
+ $ chars ->next ();
58
56
break ;
59
57
}
60
58
61
59
$ buff .= $ token ;
62
- next ( $ chars );
60
+ $ chars-> next ( );
63
61
}
64
62
65
63
return $ buff ;
66
64
}
67
65
68
66
/**
69
- * @param string[] $chars array of unicode characters by reference
67
+ * @param \Iterator<int, string> $chars array of Unicode characters by reference
70
68
*/
71
- private function scanQuotedString ( array & $ chars ) : string {
69
+ private function scanQuotedString ( \ Iterator $ chars ) : string {
72
70
$ buff = '' ;
73
71
74
- next ($ chars );
75
- for (;;) {
76
- $ token = current ($ chars );
77
- if ( $ token === false ) {
72
+ $ chars ->next ();
73
+ $ lastKey = 0 ;
74
+ for ( ; ; ) {
75
+ $ token = $ chars ->current ();
76
+ $ key = $ chars ->key ();
77
+
78
+ if ( !$ chars ->valid () ) {
78
79
throw new ParseException (
79
80
'failed to parse path, expected ", got EOF ' ,
80
- key ( $ chars ) ?: count ( $ chars ),
81
+ $ key ?? ( $ lastKey + 1 ),
81
82
ParseException::CODE_UNEXPECTED_EOF
82
83
);
83
84
}
84
85
85
86
if ( $ token === '" ' ) {
86
- $ next = next ($ chars );
87
- if ( $ next === false || $ next === '. ' ) {
88
- next ($ chars );
87
+ $ chars ->next ();
88
+ $ next = $ chars ->current ();
89
+ $ nextKey = $ chars ->key ();
90
+
91
+ if ( !$ chars ->valid () || $ next === '. ' ) {
92
+ $ chars ->next ();
89
93
break ;
90
94
}
91
95
92
96
throw new ParseException (
93
- sprintf ('failed to parse path, expected . or EOF, got "%s" at %d ' , $ next , key ( $ chars ) ),
94
- key ( $ chars ) ,
97
+ sprintf ('failed to parse path, expected . or EOF, got "%s" at %d ' , $ next , $ key ),
98
+ $ nextKey ?? $ key ,
95
99
ParseException::CODE_UNEXPECTED_CHARACTER
96
100
);
97
101
}
98
102
99
103
$ buff .= $ token ;
100
- next ($ chars );
104
+
105
+ $ lastKey = $ key ;
106
+ $ chars ->next ();
101
107
}
102
108
103
109
return $ buff ;
104
110
}
105
111
112
+ /**
113
+ * Yields each grapheme (user‑visible “character”) from $s.
114
+ *
115
+ * @return \Generator<int,string>
116
+ */
117
+ private function iterateGraphemes ( string $ s ) : \Generator {
118
+ $ off = 0 ;
119
+ $ len = strlen ($ s );
120
+
121
+ while ( $ off < $ len && preg_match ('/\X/u ' , $ s , $ m , 0 , $ off ) ) {
122
+ $ g = $ m [0 ]; // one grapheme cluster, UTF‑8 safe
123
+
124
+ yield $ off => $ g ;
125
+
126
+ $ off += strlen ($ g ); // advance by its byte length
127
+ }
128
+ }
129
+
106
130
}
0 commit comments