[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index][Top&Search][Original]
PATCH [perl #59328] In re's, \N{U+...} doesn't match for ... > 256
The problem is that the space allocated to hold the code point for
non-charclass was a char, which of course only works up through ord 255.
Patch is attached
--- regcomp.c.orig 2008-11-03 06:54:29.000000000 -0700
+++ regcomp.c 2008-11-05 17:54:08.000000000 -0700
@@ -6617,20 +6617,30 @@
| PERL_SCAN_DISALLOW_PREFIX
| (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
UV cp;
- char string;
len = (STRLEN)(endbrace - name - 2);
cp = grok_hex(name + 2, &len, &fl, NULL);
if ( len != (STRLEN)(endbrace - name - 2) ) {
cp = 0xFFFD;
}
- if (cp > 0xff)
- RExC_utf8 = 1;
if ( valuep ) {
+ if (cp > 0xff) RExC_utf8 = 1;
*valuep = cp;
return NULL;
}
- string = (char)cp;
- sv_str= newSVpvn(&string, 1);
+
+ /* Need to convert to utf8 if either: won't fit into a byte, or the re
+ * is going to be in utf8 and the representation changes under utf8. */
+ if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) {
+ U8 string[UTF8_MAXBYTES+1];
+ U8 *tmps;
+ RExC_utf8 = 1;
+ tmps = uvuni_to_utf8(string, cp);
+ sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE);
+ } else { /* Otherwise, no need for utf8, can skip that step */
+ char string;
+ string = (char)cp;
+ sv_str= newSVpvn(&string, 1);
+ }
} else {
/* fetch the charnames handler for this scope */
HV * const table = GvHV(PL_hintgv);
@@ -6809,7 +6819,7 @@
Set_Node_Cur_Length(ret); /* MJD */
RExC_parse--;
nextchar(pRExC_state);
- } else {
+ } else { /* zero length */
ret = reg_node(pRExC_state,NOTHING);
}
if (!cached) {
--- t/op/re_tests.orig 2008-11-03 08:24:51.000000000 -0700
+++ t/op/re_tests 2008-11-05 18:33:53.000000000 -0700
@@ -1358,3 +1358,4 @@
/^\s*i.*?o\s*$/s io\n io y - -
# As reported in #59168 by Father Chrysostomos:
/(.*?)a(?!(a+)b\2c)/ baaabaac y $&-$1 baa-ba
+/\N{U+0100}/ \x{100} y $& \x{100} # Bug #59328
- Follow-Ups from:
-
demerphq <demerphq@gmail.com>
[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index][Top&Search][Original]