Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perl_do_print: stringify an SVt_IV IV/UV more efficiently #22927

Open
wants to merge 3 commits into
base: blead
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions doio.c
Original file line number Diff line number Diff line change
Expand Up @@ -2205,11 +2205,25 @@ Perl_do_print(pTHX_ SV *sv, PerlIO *fp)
return TRUE;
if (SvTYPE(sv) == SVt_IV && SvIOK(sv)) {
assert(!SvGMAGICAL(sv));
if (SvIsUV(sv))
PerlIO_printf(fp, "%" UVuf, (UV)SvUVX(sv));
else
PerlIO_printf(fp, "%" IVdf, (IV)SvIVX(sv));
return !PerlIO_error(fp);
bool happy = TRUE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is happy declared so far away from its first use?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mirroring the else branch of Perl_do_print. I kept it it the same for consistency. Happy to change both instances in a follow-up commit.


/* Adapted from Perl_sv_2pv_flags */
const U32 isUIOK = SvIsUV(sv);
/* The purpose of this union is to ensure that arr is aligned on
a 2 byte boundary, because that is what uiv_2buf() requires */
union {
char arr[TYPE_CHARS(UV)];
U16 dummy;
} buf;
char *ebuf, *ptr;
STRLEN len;
UV tempuv = SvUVX(sv);
ptr = uiv_2buf(buf.arr, SvIVX(sv), tempuv, isUIOK, &ebuf);
len = ebuf - ptr;

if (len && (PerlIO_write(fp,ptr,len) == 0))
happy = FALSE;
Comment on lines +2224 to +2225
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be

bool happy = !(len && PerlIO_write(fp, ptr, len) == 0);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was copied from the else branch of Perl_do_print. I kept it it the same for consistency. Happy to change both instances in a follow-up commit.

return happy ? !PerlIO_error(fp) : FALSE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be

return happy && !PerlIO_error(fp);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was copied from the else branch of Perl_do_print. I kept it it the same for consistency. Happy to change both instances in a follow-up commit.

}
else {
STRLEN len;
Expand Down
10 changes: 5 additions & 5 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3641,6 +3641,11 @@ EXop |bool |try_amagic_bin |int method \
|int flags
EXop |bool |try_amagic_un |int method \
|int flags
ERTi |char * |uiv_2buf |NN char * const buf \
|const IV iv \
|UV uv \
|const int is_uv \
|NN char ** const peob
Adp |SSize_t|unpackstring |NN const char *pat \
|NN const char *patend \
|NN const char *s \
Expand Down Expand Up @@ -5883,11 +5888,6 @@ ST |STRLEN |sv_pos_u2b_midway \
|const STRLEN uend
i |void |sv_unglob |NN SV * const sv \
|U32 flags
RTi |char * |uiv_2buf |NN char * const buf \
|const IV iv \
|UV uv \
|const int is_uv \
|NN char ** const peob
S |void |utf8_mg_len_cache_update \
|NN SV * const sv \
|NN MAGIC ** const mgp \
Expand Down
2 changes: 1 addition & 1 deletion embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,7 @@
# define skipspace_flags(a,b) Perl_skipspace_flags(aTHX_ a,b)
# define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a)
# define sv_only_taint_gmagic Perl_sv_only_taint_gmagic
# define uiv_2buf S_uiv_2buf
# define utf16_to_utf8_base(a,b,c,d,e,f) Perl_utf16_to_utf8_base(aTHX_ a,b,c,d,e,f)
# define utf8_to_utf16_base(a,b,c,d,e,f) Perl_utf8_to_utf16_base(aTHX_ a,b,c,d,e,f)
# define validate_proto(a,b,c,d) Perl_validate_proto(aTHX_ a,b,c,d)
Expand Down Expand Up @@ -2178,7 +2179,6 @@
# define sv_pos_u2b_forwards S_sv_pos_u2b_forwards
# define sv_pos_u2b_midway S_sv_pos_u2b_midway
# define sv_unglob(a,b) S_sv_unglob(aTHX_ a,b)
# define uiv_2buf S_uiv_2buf
# define utf8_mg_len_cache_update(a,b,c) S_utf8_mg_len_cache_update(aTHX_ a,b,c)
# define utf8_mg_pos_cache_update(a,b,c,d,e) S_utf8_mg_pos_cache_update(aTHX_ a,b,c,d,e)
# define visit(a,b,c) S_visit(aTHX_ a,b,c)
Expand Down
6 changes: 6 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ patterns and swaps in a new dedicated operator (C<OP_SUBSTR_LEFT>).

=item *

The stringification of integers by L<perlfunc/print> and L<perlfunc/say>,
when coming from an SVt_IV, is now more efficient.
[GH #XXXXX]

=item *

XXX

=back
Expand Down
14 changes: 7 additions & 7 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 0 additions & 82 deletions sv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2766,88 +2766,6 @@ Perl_sv_2num(pTHX_ SV *const sv)
return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
}

/* int2str_table: lookup table containing string representations of all
* two digit numbers. For example, int2str_table.arr[0] is "00" and
* int2str_table.arr[12*2] is "12".
*
* We are going to read two bytes at a time, so we have to ensure that
* the array is aligned to a 2 byte boundary. That's why it was made a
* union with a dummy U16 member. */
static const union {
char arr[200];
U16 dummy;
} int2str_table = {{
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
'0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
'1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
'2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
'2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
'3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
'4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
'4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
'5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
'6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
'7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
'8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
'9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
'9', '8', '9', '9'
}};

/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
* UV as a string towards the end of buf, and return pointers to start and
* end of it.
*
* We assume that buf is at least TYPE_CHARS(UV) long.
*/

PERL_STATIC_INLINE char *
S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
{
char *ptr = buf + TYPE_CHARS(UV);
char * const ebuf = ptr;
int sign;
U16 *word_ptr, *word_table;

PERL_ARGS_ASSERT_UIV_2BUF;

/* ptr has to be properly aligned, because we will cast it to U16* */
assert(PTR2nat(ptr) % 2 == 0);
/* we are going to read/write two bytes at a time */
word_ptr = (U16*)ptr;
word_table = (U16*)int2str_table.arr;

if (UNLIKELY(is_uv))
sign = 0;
else if (iv >= 0) {
uv = iv;
sign = 0;
} else {
/* Using 0- here to silence bogus warning from MS VC */
uv = (UV) (0 - (UV) iv);
sign = 1;
}

while (uv > 99) {
*--word_ptr = word_table[uv % 100];
uv /= 100;
}
ptr = (char*)word_ptr;

if (uv < 10)
*--ptr = (char)uv + '0';
else {
*--word_ptr = word_table[uv];
ptr = (char*)word_ptr;
}

if (sign)
*--ptr = '-';

*peob = ebuf;
return ptr;
}

/* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
* infinity or a not-a-number, writes the appropriate strings to the
* buffer, including a zero byte. On success returns the written length,
Expand Down
83 changes: 83 additions & 0 deletions sv_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,89 @@ Perl_sv_setpv_freshbuf(pTHX_ SV *const sv)
return SvPVX(sv);
}

/* int2str_table: lookup table containing string representations of all
* two digit numbers. For example, int2str_table.arr[0] is "00" and
* int2str_table.arr[12*2] is "12".
*
* We are going to read two bytes at a time, so we have to ensure that
* the array is aligned to a 2 byte boundary. That's why it was made a
* union with a dummy U16 member. */
static const union {
char arr[200];
U16 dummy;
} int2str_table = {{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this symbol be visible in external (C/XS) code? Because at the moment it is an unprefixed identifier (no Perl_ or anything) in the global namespace.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I'm not sure how best to handle it. Happy to take advice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guarding it inside a #if defined(PERL_CORE) || defined (PERL_EXT)?

'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
'0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
'1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
'2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
'2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
'3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
'4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
'4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
'5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
'6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
'7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
'8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
'9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
'9', '8', '9', '9'
}};

/* uiv_2buf() was originally a private routine in sv.c for use by
* sv_2pv_flags(), but its usefulness elsewhere was noted, and it was
* moved out here. It prints an IV or UV as a string towards the end
* of buf, and return pointers to start and end of it.
*
* We assume that buf is at least TYPE_CHARS(UV) long.
*/

PERL_STATIC_INLINE char *
S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
{
char *ptr = buf + TYPE_CHARS(UV);
char * const ebuf = ptr;
int sign;
U16 *word_ptr, *word_table;

PERL_ARGS_ASSERT_UIV_2BUF;

/* ptr has to be properly aligned, because we will cast it to U16* */
assert(PTR2nat(ptr) % 2 == 0);
/* we are going to read/write two bytes at a time */
word_ptr = (U16*)ptr;
word_table = (U16*)int2str_table.arr;

if (UNLIKELY(is_uv))
sign = 0;
else if (iv >= 0) {
uv = iv;
sign = 0;
} else {
/* Using 0- here to silence bogus warning from MS VC */
uv = (UV) (0 - (UV) iv);
sign = 1;
}

while (uv > 99) {
*--word_ptr = word_table[uv % 100];
uv /= 100;
}
ptr = (char*)word_ptr;

if (uv < 10)
*--ptr = (char)uv + '0';
else {
*--word_ptr = word_table[uv];
ptr = (char*)word_ptr;
}

if (sign)
*--ptr = '-';

*peob = ebuf;
return ptr;
}

/*
* ex: set ts=8 sts=4 sw=4 et:
*/
Loading