Skip to content

Commit 520e487

Browse files
committed
Fix interpolating numbered backref with lookbehind, plus refactor
1 parent b4734fd commit 520e487

File tree

5 files changed

+39
-29
lines changed

5 files changed

+39
-29
lines changed

src/atomic-groups.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';
2-
import {noncapturingStart} from './utils.js';
2+
import {noncapturingDelim} from './utils.js';
33

44
/**
55
@param {string} expression
@@ -9,7 +9,7 @@ export function atomicGroupsPostprocessor(expression) {
99
if (!hasUnescaped(expression, '\\(\\?>', Context.DEFAULT)) {
1010
return expression;
1111
}
12-
const token = new RegExp(String.raw`(?<noncapturingStart>${noncapturingStart})|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`, 'gsu');
12+
const token = new RegExp(String.raw`(?<noncapturingStart>${noncapturingDelim})|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`, 'gsu');
1313
const aGDelim = '(?>';
1414
const emulatedAGDelim = '(?:(?=(';
1515
let capturingGroupCount = 0;

src/flag-n.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import {RegexContext, getEndContextForIncompleteExpression, noncapturingStart} from './utils.js';
1+
import {RegexContext, getEndContextForIncompleteExpression, noncapturingDelim} from './utils.js';
22

33
const token = new RegExp(String.raw`
4-
${noncapturingStart}
4+
${noncapturingDelim}
55
| \(\?<
66
| (?<backrefNum>\\[1-9]\d*)
77
| \\?.

src/flag-x.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {Context, replaceUnescaped} from 'regex-utilities';
2-
import {CharClassContext, RegexContext, doublePunctuatorChars, getEndContextForIncompleteExpression, noncapturingStart, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
2+
import {CharClassContext, RegexContext, doublePunctuatorChars, getEndContextForIncompleteExpression, noncapturingDelim, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
33

44
const ws = /^\s$/;
55
const escapedWsOrHash = /^\\[\s#]$/;
@@ -14,7 +14,7 @@ const token = new RegExp(String.raw`
1414
| 0\d+
1515
)
1616
| \[\^
17-
| ${noncapturingStart}
17+
| ${noncapturingDelim}
1818
| \(\?<
1919
| (?<dp>[${doublePunctuatorChars}])\k<dp>
2020
| --

src/subroutines.js

+29-21
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped} from 'regex-utilities';
2-
import {countCaptures} from './utils.js';
2+
import {capturingDelim, countCaptures, namedCapturingDelim} from './utils.js';
33

44
/**
55
@param {string} expression
66
@returns {string}
77
*/
88
export function subroutinesPostprocessor(expression) {
9-
const namedGroups = getNamedCapturingGroups(expression);
9+
const namedGroups = getNamedCapturingGroups(expression, true);
1010
return processDefinitionGroup(
1111
processSubroutines(expression, namedGroups),
1212
namedGroups
@@ -16,22 +16,24 @@ export function subroutinesPostprocessor(expression) {
1616
// Explicitly exclude `&` from subroutine name chars because it's used by extension
1717
// `regex-recursion` for recursive subroutines via `\g<name&R=N>`
1818
const subroutinePattern = String.raw`\\g<(?<subroutineName>[^>&]+)>`;
19-
const namedCapturingStartPattern = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
20-
const capturingStartPattern = String.raw`\((?!\?)|${namedCapturingStartPattern}`;
2119
const token = new RegExp(String.raw`
2220
${subroutinePattern}
23-
| (?<capturingStart>${capturingStartPattern})
21+
| (?<capturingStart>${capturingDelim})
2422
| \\(?<backrefNum>[1-9]\d*)
2523
| \\k<(?<backrefName>[^>]+)>
2624
| \\?.
2725
`.replace(/\s+/g, ''), 'gsu');
2826

2927
/**
30-
@typedef {Map<string, {contents: string; isUnique: boolean}>} NamedCapturingGroupsMap
28+
@typedef {
29+
Map<string, {
30+
isUnique: boolean;
31+
contents?: string;
32+
}>} NamedCapturingGroupsMap
3133
*/
3234

3335
/**
34-
Transform syntax `\g<name>`
36+
Transform `\g<name>`
3537
@param {string} expression
3638
@param {NamedCapturingGroupsMap} namedGroups
3739
@returns {string}
@@ -41,10 +43,10 @@ function processSubroutines(expression, namedGroups) {
4143
return expression;
4244
}
4345
const backrefIncrements = [0];
46+
const openSubroutinesMap = new Map();
47+
const openSubroutinesStack = [];
4448
let numCapturesPassedOutsideSubroutines = 0;
4549
let numCapturesPassedInsideSubroutines = 0;
46-
let openSubroutinesMap = new Map();
47-
let openSubroutinesStack = [];
4850
let numCharClassesOpen = 0;
4951
let result = expression;
5052
let match;
@@ -105,7 +107,8 @@ function processSubroutines(expression, namedGroups) {
105107
if (openSubroutinesMap.size) {
106108
const numCapturesBeforeReferencedGroup = countCapturesBeforeGroupName(expression, openSubroutinesStack[0]);
107109
if (num > numCapturesBeforeReferencedGroup) {
108-
increment = numCapturesPassedOutsideSubroutines +
110+
increment =
111+
numCapturesPassedOutsideSubroutines +
109112
numCapturesPassedInsideSubroutines -
110113
numCapturesBeforeReferencedGroup -
111114
subroutine.numCaptures;
@@ -167,11 +170,11 @@ Strip `(?(DEFINE)…)`
167170
@returns {string}
168171
*/
169172
function processDefinitionGroup(expression, namedGroups) {
170-
const defineDelim = execUnescaped(expression, String.raw`\(\?\(DEFINE\)`, 0, Context.DEFAULT);
171-
if (!defineDelim) {
173+
const defineStart = execUnescaped(expression, String.raw`\(\?\(DEFINE\)`, 0, Context.DEFAULT);
174+
if (!defineStart) {
172175
return expression;
173176
}
174-
const defineGroup = getGroup(expression, defineDelim);
177+
const defineGroup = getGroup(expression, defineStart);
175178
if (defineGroup.afterPos < expression.length) {
176179
// Supporting DEFINE at positions other than the end would significantly complicate edge-case
177180
// backref handling. Note: Flag x's preprocessing permits trailing whitespace and comments
@@ -180,7 +183,7 @@ function processDefinitionGroup(expression, namedGroups) {
180183
throw new Error('DEFINE group is unclosed');
181184
}
182185
// `(?:)` separators can be added by the flag x preprocessor
183-
const contentsToken = new RegExp(String.raw`${namedCapturingStartPattern}|\(\?:\)|(?<unsupported>\\?.)`, 'gsu');
186+
const contentsToken = new RegExp(String.raw`${namedCapturingDelim}|\(\?:\)|(?<unsupported>\\?.)`, 'gsu');
184187
let match;
185188
while (match = contentsToken.exec(defineGroup.contents)) {
186189
const {captureName, unsupported} = match.groups;
@@ -199,7 +202,7 @@ function processDefinitionGroup(expression, namedGroups) {
199202
}
200203
}
201204
if (duplicateName) {
202-
throw new Error(`Group names within DEFINE must be unique; has duplicate "${duplicateName}"`);
205+
throw new Error(`Duplicate group name "${duplicateName}" within DEFINE"`);
203206
}
204207
contentsToken.lastIndex = group.afterPos;
205208
continue;
@@ -211,7 +214,7 @@ function processDefinitionGroup(expression, namedGroups) {
211214
throw new Error(`DEFINE group includes unsupported syntax at top level`);
212215
}
213216
}
214-
return expression.slice(0, defineDelim.index);
217+
return expression.slice(0, defineStart.index);
215218
}
216219

217220
/**
@@ -238,7 +241,7 @@ function countCapturesBeforeGroupName(expression, groupName) {
238241
let num = 0;
239242
let pos = 0;
240243
let match;
241-
while (match = execUnescaped(expression, capturingStartPattern, pos, Context.DEFAULT)) {
244+
while (match = execUnescaped(expression, capturingDelim, pos, Context.DEFAULT)) {
242245
const {0: m, index, groups: {captureName}} = match;
243246
if (captureName === groupName) {
244247
break;
@@ -258,7 +261,7 @@ function getCaptureNum(expression, groupName) {
258261
let num = 0;
259262
let pos = 0;
260263
let match;
261-
while (match = execUnescaped(expression, capturingStartPattern, pos, Context.DEFAULT)) {
264+
while (match = execUnescaped(expression, capturingDelim, pos, Context.DEFAULT)) {
262265
const {0: m, index, groups: {captureName}} = match;
263266
num++;
264267
if (captureName === groupName) {
@@ -282,22 +285,27 @@ function spliceStr(str, pos, oldValue, newValue) {
282285

283286
/**
284287
@param {string} expression
288+
@param {boolean} [includeContents] Leave off if unneeded, for perf
285289
@returns {NamedCapturingGroupsMap}
286290
*/
287-
function getNamedCapturingGroups(expression) {
291+
function getNamedCapturingGroups(expression, includeContents) {
288292
const namedGroups = new Map();
289293
forEachUnescaped(
290294
expression,
291-
namedCapturingStartPattern,
295+
namedCapturingDelim,
292296
({0: m, index, groups: {captureName}}) => {
293297
// If there are duplicate capture names, subroutines refer to the first instance of the given
294298
// group (matching the behavior of PCRE and Perl)
295299
if (namedGroups.has(captureName)) {
296300
namedGroups.get(captureName).isUnique = false;
297301
} else {
298302
namedGroups.set(captureName, {
299-
contents: getGroupContents(expression, index + m.length),
300303
isUnique: true,
304+
...(
305+
includeContents ? {
306+
contents: getGroupContents(expression, index + m.length),
307+
} : null
308+
),
301309
});
302310
}
303311
},

src/utils.js

+4-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ export const flagVSupported = (() => {
3838

3939
export const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
4040

41-
export const noncapturingStart = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
41+
export const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
42+
export const capturingDelim = String.raw`\((?!\?)|${namedCapturingDelim}`;
43+
export const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
4244

4345
/**
4446
Escape special characters for the given context, assuming flag v.
@@ -225,7 +227,7 @@ export function getEndContextForIncompleteExpression(incompleteExpression, {
225227
*/
226228
export function countCaptures(expression) {
227229
let num = 0;
228-
forEachUnescaped(expression, String.raw`\((?:(?!\?)|\?<[^>]+>)`, () => num++, Context.DEFAULT);
230+
forEachUnescaped(expression, capturingDelim, () => num++, Context.DEFAULT);
229231
return num;
230232
}
231233

0 commit comments

Comments
 (0)