1
1
import { Context , execUnescaped , forEachUnescaped , getGroupContents , hasUnescaped } from 'regex-utilities' ;
2
- import { countCaptures } from './utils.js' ;
2
+ import { capturingDelim , countCaptures , namedCapturingDelim } from './utils.js' ;
3
3
4
4
/**
5
5
@param {string } expression
6
6
@returns {string }
7
7
*/
8
8
export function subroutinesPostprocessor ( expression ) {
9
- const namedGroups = getNamedCapturingGroups ( expression ) ;
9
+ const namedGroups = getNamedCapturingGroups ( expression , true ) ;
10
10
return processDefinitionGroup (
11
11
processSubroutines ( expression , namedGroups ) ,
12
12
namedGroups
@@ -16,22 +16,24 @@ export function subroutinesPostprocessor(expression) {
16
16
// Explicitly exclude `&` from subroutine name chars because it's used by extension
17
17
// `regex-recursion` for recursive subroutines via `\g<name&R=N>`
18
18
const subroutinePattern = String . raw `\\g<(?<subroutineName>[^>&]+)>` ;
19
- const namedCapturingStartPattern = String . raw `\(\?<(?![=!])(?<captureName>[^>]+)>` ;
20
- const capturingStartPattern = String . raw `\((?!\?)|${ namedCapturingStartPattern } ` ;
21
19
const token = new RegExp ( String . raw `
22
20
${ subroutinePattern }
23
- | (?<capturingStart>${ capturingStartPattern } )
21
+ | (?<capturingStart>${ capturingDelim } )
24
22
| \\(?<backrefNum>[1-9]\d*)
25
23
| \\k<(?<backrefName>[^>]+)>
26
24
| \\?.
27
25
` . replace ( / \s + / g, '' ) , 'gsu' ) ;
28
26
29
27
/**
30
- @typedef {Map<string, {contents: string; isUnique: boolean}> } NamedCapturingGroupsMap
28
+ @typedef {
29
+ Map<string, {
30
+ isUnique: boolean;
31
+ contents?: string;
32
+ }>} NamedCapturingGroupsMap
31
33
*/
32
34
33
35
/**
34
- Transform syntax `\g<name>`
36
+ Transform `\g<name>`
35
37
@param {string } expression
36
38
@param {NamedCapturingGroupsMap } namedGroups
37
39
@returns {string }
@@ -41,10 +43,10 @@ function processSubroutines(expression, namedGroups) {
41
43
return expression ;
42
44
}
43
45
const backrefIncrements = [ 0 ] ;
46
+ const openSubroutinesMap = new Map ( ) ;
47
+ const openSubroutinesStack = [ ] ;
44
48
let numCapturesPassedOutsideSubroutines = 0 ;
45
49
let numCapturesPassedInsideSubroutines = 0 ;
46
- let openSubroutinesMap = new Map ( ) ;
47
- let openSubroutinesStack = [ ] ;
48
50
let numCharClassesOpen = 0 ;
49
51
let result = expression ;
50
52
let match ;
@@ -105,7 +107,8 @@ function processSubroutines(expression, namedGroups) {
105
107
if ( openSubroutinesMap . size ) {
106
108
const numCapturesBeforeReferencedGroup = countCapturesBeforeGroupName ( expression , openSubroutinesStack [ 0 ] ) ;
107
109
if ( num > numCapturesBeforeReferencedGroup ) {
108
- increment = numCapturesPassedOutsideSubroutines +
110
+ increment =
111
+ numCapturesPassedOutsideSubroutines +
109
112
numCapturesPassedInsideSubroutines -
110
113
numCapturesBeforeReferencedGroup -
111
114
subroutine . numCaptures ;
@@ -167,11 +170,11 @@ Strip `(?(DEFINE)…)`
167
170
@returns {string }
168
171
*/
169
172
function processDefinitionGroup ( expression , namedGroups ) {
170
- const defineDelim = execUnescaped ( expression , String . raw `\(\?\(DEFINE\)` , 0 , Context . DEFAULT ) ;
171
- if ( ! defineDelim ) {
173
+ const defineStart = execUnescaped ( expression , String . raw `\(\?\(DEFINE\)` , 0 , Context . DEFAULT ) ;
174
+ if ( ! defineStart ) {
172
175
return expression ;
173
176
}
174
- const defineGroup = getGroup ( expression , defineDelim ) ;
177
+ const defineGroup = getGroup ( expression , defineStart ) ;
175
178
if ( defineGroup . afterPos < expression . length ) {
176
179
// Supporting DEFINE at positions other than the end would significantly complicate edge-case
177
180
// backref handling. Note: Flag x's preprocessing permits trailing whitespace and comments
@@ -180,7 +183,7 @@ function processDefinitionGroup(expression, namedGroups) {
180
183
throw new Error ( 'DEFINE group is unclosed' ) ;
181
184
}
182
185
// `(?:)` separators can be added by the flag x preprocessor
183
- const contentsToken = new RegExp ( String . raw `${ namedCapturingStartPattern } |\(\?:\)|(?<unsupported>\\?.)` , 'gsu' ) ;
186
+ const contentsToken = new RegExp ( String . raw `${ namedCapturingDelim } |\(\?:\)|(?<unsupported>\\?.)` , 'gsu' ) ;
184
187
let match ;
185
188
while ( match = contentsToken . exec ( defineGroup . contents ) ) {
186
189
const { captureName, unsupported} = match . groups ;
@@ -199,7 +202,7 @@ function processDefinitionGroup(expression, namedGroups) {
199
202
}
200
203
}
201
204
if ( duplicateName ) {
202
- throw new Error ( `Group names within DEFINE must be unique; has duplicate "${ duplicateName } "` ) ;
205
+ throw new Error ( `Duplicate group name "${ duplicateName } " within DEFINE "` ) ;
203
206
}
204
207
contentsToken . lastIndex = group . afterPos ;
205
208
continue ;
@@ -211,7 +214,7 @@ function processDefinitionGroup(expression, namedGroups) {
211
214
throw new Error ( `DEFINE group includes unsupported syntax at top level` ) ;
212
215
}
213
216
}
214
- return expression . slice ( 0 , defineDelim . index ) ;
217
+ return expression . slice ( 0 , defineStart . index ) ;
215
218
}
216
219
217
220
/**
@@ -238,7 +241,7 @@ function countCapturesBeforeGroupName(expression, groupName) {
238
241
let num = 0 ;
239
242
let pos = 0 ;
240
243
let match ;
241
- while ( match = execUnescaped ( expression , capturingStartPattern , pos , Context . DEFAULT ) ) {
244
+ while ( match = execUnescaped ( expression , capturingDelim , pos , Context . DEFAULT ) ) {
242
245
const { 0 : m , index, groups : { captureName} } = match ;
243
246
if ( captureName === groupName ) {
244
247
break ;
@@ -258,7 +261,7 @@ function getCaptureNum(expression, groupName) {
258
261
let num = 0 ;
259
262
let pos = 0 ;
260
263
let match ;
261
- while ( match = execUnescaped ( expression , capturingStartPattern , pos , Context . DEFAULT ) ) {
264
+ while ( match = execUnescaped ( expression , capturingDelim , pos , Context . DEFAULT ) ) {
262
265
const { 0 : m , index, groups : { captureName} } = match ;
263
266
num ++ ;
264
267
if ( captureName === groupName ) {
@@ -282,22 +285,27 @@ function spliceStr(str, pos, oldValue, newValue) {
282
285
283
286
/**
284
287
@param {string } expression
288
+ @param {boolean } [includeContents] Leave off if unneeded, for perf
285
289
@returns {NamedCapturingGroupsMap }
286
290
*/
287
- function getNamedCapturingGroups ( expression ) {
291
+ function getNamedCapturingGroups ( expression , includeContents ) {
288
292
const namedGroups = new Map ( ) ;
289
293
forEachUnescaped (
290
294
expression ,
291
- namedCapturingStartPattern ,
295
+ namedCapturingDelim ,
292
296
( { 0 : m , index, groups : { captureName} } ) => {
293
297
// If there are duplicate capture names, subroutines refer to the first instance of the given
294
298
// group (matching the behavior of PCRE and Perl)
295
299
if ( namedGroups . has ( captureName ) ) {
296
300
namedGroups . get ( captureName ) . isUnique = false ;
297
301
} else {
298
302
namedGroups . set ( captureName , {
299
- contents : getGroupContents ( expression , index + m . length ) ,
300
303
isUnique : true ,
304
+ ...(
305
+ includeContents ? {
306
+ contents : getGroupContents ( expression , index + m . length ) ,
307
+ } : null
308
+ ) ,
301
309
} ) ;
302
310
}
303
311
} ,
0 commit comments