From b1397c41ce94cb29316deeafcbc449e9a30ed358 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Wed, 15 Jan 2025 22:41:05 +0000 Subject: [PATCH] OP_SUBSTR_LEFT: GH#22914 - multiple pointers to replacement OP The recent initial commit for OP_SUBSTR_LEFT failed to account for there being multiple paths from a non-trivial LENGTH to the "" replacement CONST OP. This could result in the replacement SV being erroneously pushed to the stack, causing `pp_substr_left` to try to operate on the wrong SV. This commit nulls out the replacement OP, so that even if it is encountered, no erroneous SV is pushed. Contrary to the comment in the original commit, this actually does not break B::Deparse. Thanks to @mauke for figuring this out and preparing a patch before I'd even opened my browser. --- peep.c | 17 +++++++++++------ t/op/substr_left.t | 7 +++++++ t/perf/opcount.t | 8 ++++---- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/peep.c b/peep.c index e9de45eb0a68..032fbbfc0728 100644 --- a/peep.c +++ b/peep.c @@ -3869,7 +3869,7 @@ Perl_rpeep(pTHX_ OP *o) break; case OP_SUBSTR: { - OP *expr, *offs, *len; + OP *expr, *offs, *len, *repl = NULL; /* Specialize substr($x, 0, $y) and substr($x,0,$y,"") */ /* Does this substr have 3-4 args and amiable flags? */ if ( @@ -3897,7 +3897,7 @@ Perl_rpeep(pTHX_ OP *o) if (cMAXARG3x(o) == 4) {/* replacement */ /* Is the replacement string CONST ""? */ - OP *repl = OpSIBLING(len); + repl = OpSIBLING(len); if (repl->op_type != OP_CONST) break; SV *repl_sv = cSVOPx_sv(repl); @@ -3908,12 +3908,10 @@ Perl_rpeep(pTHX_ OP *o) break; } /* It's on! */ - /* Take out the static LENGTH & REPLACMENT OPs */ + /* Take out the static LENGTH OP. */ /* (The finalizer does not seem to change op_next here) */ expr->op_next = offs->op_next; o->op_private = cMAXARG3x(o); - if (cMAXARG3x(o) == 4) - len->op_next = o; /* We have a problem if padrange pushes the expr OP for us, * then jumps straight to the offs CONST OP. For example: @@ -3924,7 +3922,14 @@ Perl_rpeep(pTHX_ OP *o) * B::Deparse. :/ */ op_null(offs); - /* repl status unchanged because it makes Deparsing easier. */ + /* There can be multiple pointers to repl, see GH #22914. + * substr $x, 0, $y ? 2 : 3, ""; + * So instead of rewriting all of len, null out repl. */ + if (repl) { + op_null(repl); + /* We can still rewrite the simple len case though.*/ + len->op_next = o; + } /* Upgrade the SUBSTR to a SUBSTR_LEFT */ OpTYPE_set(o, OP_SUBSTR_LEFT); diff --git a/t/op/substr_left.t b/t/op/substr_left.t index a9e37037fc0e..72a18332de0a 100644 --- a/t/op/substr_left.t +++ b/t/op/substr_left.t @@ -104,5 +104,12 @@ $str = "\x00\x01\x02\x03\x04\x05"; $result = substr($str, 0, 3, ""); is($result, "\x00\x01\x02", 'hex EXPR: returns correct characters'); is($str, "\x03\x04\x05", 'hex EXPR: retains correct characters'); +# GH #22914. LEN has more than one pointer to REPL. +$str = "perl"; +# Hopefully $INC[0] ne '/dev/random' is a reasonable test assumption... +# (We need a condition that no future clever optimiser will strip) +$result = substr($str, 0, $INC[0] eq '/dev/random' ? 2: 3, ''); +is($result, 'per', 'GH#22914: non-trivial LEN returns correct characters'); +is($str, 'l', 'GH#22914: non-trivial LEN retains correct characters'); done_testing(); diff --git a/t/perf/opcount.t b/t/perf/opcount.t index 0cac902a95fb..8695e162d16e 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -1034,7 +1034,7 @@ test_opcount(0, "substr with const zero offset and '' repl (void)", { substr => 0, substr_left => 1, - const => 2, + const => 1, }); test_opcount(0, "substr with const zero offset and '' repl (lexical)", @@ -1042,7 +1042,7 @@ test_opcount(0, "substr with const zero offset and '' repl (lexical)", { substr => 0, substr_left => 1, - const => 2, + const => 1, padsv => 3, sassign => 1 }); @@ -1052,7 +1052,7 @@ test_opcount(0, "substr with const zero offset and '' repl (lexical TARGMY)", { substr => 0, substr_left => 1, - const => 2, + const => 1, padsv => 3, padsv_store => 0, sassign => 0 @@ -1063,7 +1063,7 @@ test_opcount(0, "substr with const zero offset and '' repl (gv)", { substr => 0, substr_left => 1, - const => 2, + const => 1, gvsv => 1, sassign => 1 });