|
@@ -0,0 +1,271 @@
|
|
|
+From 6a2d07f43ae7cfcb2eb30cf39751f2f7fed7ecc1 Mon Sep 17 00:00:00 2001
|
|
|
+From: Yves Orton <demerphq@gmail.com>
|
|
|
+Date: Mon, 26 Jun 2017 13:19:55 +0200
|
|
|
+Subject: [PATCH 3/3] fix #131649 - extended charclass can trigger assert
|
|
|
+
|
|
|
+The extended charclass parser makes some assumptions during the
|
|
|
+first pass which are only true on well structured input, and it
|
|
|
+does not properly catch various errors. later on the code assumes
|
|
|
+that things the first pass will let through are valid, when in
|
|
|
+fact they should trigger errors.
|
|
|
+
|
|
|
+(cherry picked from commit 19a498a461d7c81ae3507c450953d1148efecf4f)
|
|
|
+
|
|
|
+CVE: CVE-2018-18314
|
|
|
+Upstream-Status: Backport
|
|
|
+[https://perl5.git.perl.org/perl.git/commit/dabe076af345ab4512ea80245b4e4cd7ec0996cd]
|
|
|
+
|
|
|
+Signed-off-by: Dan Tran <dantran@microsoft.com>
|
|
|
+---
|
|
|
+ pod/perldiag.pod | 27 ++++++++++++++++++++++++++-
|
|
|
+ pod/perlrecharclass.pod | 4 ++--
|
|
|
+ regcomp.c | 23 +++++++++++++----------
|
|
|
+ t/lib/warnings/regcomp | 6 +++---
|
|
|
+ t/re/reg_mesg.t | 29 ++++++++++++++++-------------
|
|
|
+ t/re/regex_sets.t | 6 +++---
|
|
|
+ 6 files changed, 63 insertions(+), 32 deletions(-)
|
|
|
+
|
|
|
+diff --git a/pod/perldiag.pod b/pod/perldiag.pod
|
|
|
+index 737d3633f6..644b814008 100644
|
|
|
+--- a/pod/perldiag.pod
|
|
|
++++ b/pod/perldiag.pod
|
|
|
+@@ -5777,7 +5777,7 @@ yourself.
|
|
|
+ a perl4 interpreter, especially if the next 2 tokens are "use strict"
|
|
|
+ or "my $var" or "our $var".
|
|
|
+
|
|
|
+-=item Syntax error in (?[...]) in regex m/%s/
|
|
|
++=item Syntax error in (?[...]) in regex; marked by <-- HERE in m/%s/
|
|
|
+
|
|
|
+ (F) Perl could not figure out what you meant inside this construct; this
|
|
|
+ notifies you that it is giving up trying.
|
|
|
+@@ -6153,6 +6153,31 @@ for example,
|
|
|
+ (F) The unexec() routine failed for some reason. See your local FSF
|
|
|
+ representative, who probably put it there in the first place.
|
|
|
+
|
|
|
++=item Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/%s/
|
|
|
++
|
|
|
++(F) While parsing an extended character class a ']' character was encountered
|
|
|
++at a point in the definition where the only legal use of ']' is to close the
|
|
|
++character class definition as part of a '])', you may have forgotten the close
|
|
|
++paren, or otherwise confused the parser.
|
|
|
++
|
|
|
++=item Expecting close paren for nested extended charclass in regex; marked by <-- HERE in m/%s/
|
|
|
++
|
|
|
++(F) While parsing a nested extended character class like:
|
|
|
++
|
|
|
++ (?[ ... (?flags:(?[ ... ])) ... ])
|
|
|
++ ^
|
|
|
++
|
|
|
++we expected to see a close paren ')' (marked by ^) but did not.
|
|
|
++
|
|
|
++=item Expecting close paren for wrapper for nested extended charclass in regex; marked by <-- HERE in m/%s/
|
|
|
++
|
|
|
++(F) While parsing a nested extended character class like:
|
|
|
++
|
|
|
++ (?[ ... (?flags:(?[ ... ])) ... ])
|
|
|
++ ^
|
|
|
++
|
|
|
++we expected to see a close paren ')' (marked by ^) but did not.
|
|
|
++
|
|
|
+ =item Unexpected binary operator '%c' with no preceding operand in regex;
|
|
|
+ marked by S<<-- HERE> in m/%s/
|
|
|
+
|
|
|
+diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
|
|
|
+index 89f4a7ef3f..a557cc0384 100644
|
|
|
+--- a/pod/perlrecharclass.pod
|
|
|
++++ b/pod/perlrecharclass.pod
|
|
|
+@@ -1101,8 +1101,8 @@ hence both of the following work:
|
|
|
+ Any contained POSIX character classes, including things like C<\w> and C<\D>
|
|
|
+ respect the C<E<sol>a> (and C<E<sol>aa>) modifiers.
|
|
|
+
|
|
|
+-C<< (?[ ]) >> is a regex-compile-time construct. Any attempt to use
|
|
|
+-something which isn't knowable at the time the containing regular
|
|
|
++Note that C<< (?[ ]) >> is a regex-compile-time construct. Any attempt
|
|
|
++to use something which isn't knowable at the time the containing regular
|
|
|
+ expression is compiled is a fatal error. In practice, this means
|
|
|
+ just three limitations:
|
|
|
+
|
|
|
+diff --git a/regcomp.c b/regcomp.c
|
|
|
+index 2688979882..cb8409ed27 100644
|
|
|
+--- a/regcomp.c
|
|
|
++++ b/regcomp.c
|
|
|
+@@ -14609,8 +14609,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
|
|
|
+ TRUE /* Force /x */ );
|
|
|
+
|
|
|
+ switch (*RExC_parse) {
|
|
|
+- case '?':
|
|
|
+- if (RExC_parse[1] == '[') depth++, RExC_parse++;
|
|
|
++ case '(':
|
|
|
++ if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
|
|
|
++ depth++, RExC_parse+=2;
|
|
|
+ /* FALLTHROUGH */
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+@@ -14667,9 +14668,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
|
|
|
+ }
|
|
|
+
|
|
|
+ case ']':
|
|
|
+- if (depth--) break;
|
|
|
+- RExC_parse++;
|
|
|
+- if (*RExC_parse == ')') {
|
|
|
++ if (RExC_parse[1] == ')') {
|
|
|
++ RExC_parse++;
|
|
|
++ if (depth--) break;
|
|
|
+ node = reganode(pRExC_state, ANYOF, 0);
|
|
|
+ RExC_size += ANYOF_SKIP;
|
|
|
+ nextchar(pRExC_state);
|
|
|
+@@ -14681,20 +14682,20 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
|
|
|
+
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+- goto no_close;
|
|
|
++ RExC_parse++;
|
|
|
++ vFAIL("Unexpected ']' with no following ')' in (?[...");
|
|
|
+ }
|
|
|
+
|
|
|
+ RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
|
|
|
+ }
|
|
|
+
|
|
|
+- no_close:
|
|
|
+ /* We output the messages even if warnings are off, because we'll fail
|
|
|
+ * the very next thing, and these give a likely diagnosis for that */
|
|
|
+ if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
|
|
|
+ output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
|
|
|
+ }
|
|
|
+
|
|
|
+- FAIL("Syntax error in (?[...])");
|
|
|
++ vFAIL("Syntax error in (?[...])");
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Pass 2 only after this. */
|
|
|
+@@ -14868,12 +14869,14 @@ redo_curchar:
|
|
|
+ * inversion list, and RExC_parse points to the trailing
|
|
|
+ * ']'; the next character should be the ')' */
|
|
|
+ RExC_parse++;
|
|
|
+- assert(UCHARAT(RExC_parse) == ')');
|
|
|
++ if (UCHARAT(RExC_parse) != ')')
|
|
|
++ vFAIL("Expecting close paren for nested extended charclass");
|
|
|
+
|
|
|
+ /* Then the ')' matching the original '(' handled by this
|
|
|
+ * case: statement */
|
|
|
+ RExC_parse++;
|
|
|
+- assert(UCHARAT(RExC_parse) == ')');
|
|
|
++ if (UCHARAT(RExC_parse) != ')')
|
|
|
++ vFAIL("Expecting close paren for wrapper for nested extended charclass");
|
|
|
+
|
|
|
+ RExC_flags = save_flags;
|
|
|
+ goto handle_operand;
|
|
|
+diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
|
|
|
+index 08cb27b00f..367276d0fc 100644
|
|
|
+--- a/t/lib/warnings/regcomp
|
|
|
++++ b/t/lib/warnings/regcomp
|
|
|
+@@ -59,21 +59,21 @@ Unmatched [ in regex; marked by <-- HERE in m/abc[ <-- HERE fi[.00./ at - line
|
|
|
+ qr/(?[[[:word]]])/;
|
|
|
+ EXPECT
|
|
|
+ Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 2.
|
|
|
+-syntax error in (?[...]) in regex m/(?[[[:word]]])/ at - line 2.
|
|
|
++Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 2.
|
|
|
+ ########
|
|
|
+ # NAME qr/(?[ [[:digit: ])/
|
|
|
+ # OPTION fatal
|
|
|
+ qr/(?[[[:digit: ])/;
|
|
|
+ EXPECT
|
|
|
+ Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 2.
|
|
|
+-syntax error in (?[...]) in regex m/(?[[[:digit: ])/ at - line 2.
|
|
|
++syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 2.
|
|
|
+ ########
|
|
|
+ # NAME qr/(?[ [:digit: ])/
|
|
|
+ # OPTION fatal
|
|
|
+ qr/(?[[:digit: ])/
|
|
|
+ EXPECT
|
|
|
+ Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 2.
|
|
|
+-syntax error in (?[...]) in regex m/(?[[:digit: ])/ at - line 2.
|
|
|
++syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 2.
|
|
|
+ ########
|
|
|
+ # NAME [perl #126141]
|
|
|
+ # OPTION fatal
|
|
|
+diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
|
|
|
+index 658397ac27..08a3688e1d 100644
|
|
|
+--- a/t/re/reg_mesg.t
|
|
|
++++ b/t/re/reg_mesg.t
|
|
|
+@@ -202,8 +202,9 @@ my @death =
|
|
|
+ '/\b{gc}/' => "'gc' is an unknown bound type {#} m/\\b{gc{#}}/",
|
|
|
+ '/\B{gc}/' => "'gc' is an unknown bound type {#} m/\\B{gc{#}}/",
|
|
|
+
|
|
|
+- '/(?[[[::]]])/' => "Syntax error in (?[...]) in regex m/(?[[[::]]])/",
|
|
|
+- '/(?[[[:w:]]])/' => "Syntax error in (?[...]) in regex m/(?[[[:w:]]])/",
|
|
|
++
|
|
|
++ '/(?[[[::]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[::]]{#}])/",
|
|
|
++ '/(?[[[:w:]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[:w:]]{#}])/",
|
|
|
+ '/(?[[:w:]])/' => "",
|
|
|
+ '/[][[:alpha:]]' => "", # [perl #127581]
|
|
|
+ '/([.].*)[.]/' => "", # [perl #127582]
|
|
|
+@@ -227,11 +228,12 @@ my @death =
|
|
|
+ '/(?[ \p{foo} ])/' => 'Can\'t find Unicode property definition "foo" {#} m/(?[ \p{foo}{#} ])/',
|
|
|
+ '/(?[ \p{ foo = bar } ])/' => 'Can\'t find Unicode property definition "foo = bar" {#} m/(?[ \p{ foo = bar }{#} ])/',
|
|
|
+ '/(?[ \8 ])/' => 'Unrecognized escape \8 in character class {#} m/(?[ \8{#} ])/',
|
|
|
+- '/(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ]/',
|
|
|
+- '/(?[ [ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ \t ]/',
|
|
|
+- '/(?[ \t ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ] ]/',
|
|
|
+- '/(?[ [ ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ ] ]/',
|
|
|
+- '/(?[ \t + \e # This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # This was supposed to be a comment ])/',
|
|
|
++ '/(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#}/",
|
|
|
++ '/(?[ [ \t ]/' => "Syntax error in (?[...]) {#} m/(?[ [ \\t ]{#}/",
|
|
|
++ '/(?[ \t ] ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#} ]/",
|
|
|
++ '/(?[ [ ] ]/' => "Syntax error in (?[...]) {#} m/(?[ [ ] ]{#}/",
|
|
|
++ '/(?[ \t + \e # This was supposed to be a comment ])/' =>
|
|
|
++ "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # This was supposed to be a comment ]){#}/",
|
|
|
+ '/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ {#}])/',
|
|
|
+ 'm/(?[[a-\d]])/' => 'False [] range "a-\d" {#} m/(?[[a-\d{#}]])/',
|
|
|
+ 'm/(?[[\w-x]])/' => 'False [] range "\w-" {#} m/(?[[\w-{#}x]])/',
|
|
|
+@@ -410,10 +412,10 @@ my @death_utf8 = mark_as_utf8(
|
|
|
+
|
|
|
+ '/ネ\p{}ネ/' => 'Empty \p{} {#} m/ネ\p{{#}}ネ/',
|
|
|
+
|
|
|
+- '/ネ(?[[[:ネ]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ]]])ネ/",
|
|
|
+- '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ: ])ネ/",
|
|
|
+- '/ネ(?[[[::]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[::]]])ネ/",
|
|
|
+- '/ネ(?[[[:ネ:]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ:]]])ネ/",
|
|
|
++ '/ネ(?[[[:ネ]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ]]{#}])ネ/",
|
|
|
++ '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) {#} m/ネ(?[[[:ネ: ])ネ{#}/",
|
|
|
++ '/ネ(?[[[::]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[::]]{#}])ネ/",
|
|
|
++ '/ネ(?[[[:ネ:]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ:]]{#}])ネ/",
|
|
|
+ '/ネ(?[[:ネ:]])ネ/' => "",
|
|
|
+ '/ネ(?[ネ])ネ/' => 'Unexpected character {#} m/ネ(?[ネ{#}])ネ/',
|
|
|
+ '/ネ(?[ + [ネ] ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ +{#} [ネ] ])/',
|
|
|
+@@ -426,8 +428,9 @@ my @death_utf8 = mark_as_utf8(
|
|
|
+ '/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/',
|
|
|
+ '/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/',
|
|
|
+ '/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/',
|
|
|
+- '/ネ(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/ネ(?[ \t ]/',
|
|
|
+- '/(?[ \t + \e # ネ This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # ネ This was supposed to be a comment ])/',
|
|
|
++ '/ネ(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[ \\t ]{#}/",
|
|
|
++ '/(?[ \t + \e # ネ This was supposed to be a comment ])/' =>
|
|
|
++ "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # ネ This was supposed to be a comment ]){#}/",
|
|
|
+ 'm/(*ネ)ネ/' => q<Unknown verb pattern 'ネ' {#} m/(*ネ){#}ネ/>,
|
|
|
+ '/\cネ/' => "Character following \"\\c\" must be printable ASCII",
|
|
|
+ '/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/",
|
|
|
+diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t
|
|
|
+index 92875677be..60a126ba3c 100644
|
|
|
+--- a/t/re/regex_sets.t
|
|
|
++++ b/t/re/regex_sets.t
|
|
|
+@@ -157,13 +157,13 @@ for my $char ("٠", "٥", "٩") {
|
|
|
+ eval { $_ = '/(?[(\c]) /'; qr/$_/ };
|
|
|
+ like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
|
|
|
+ eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ };
|
|
|
+- like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
|
|
|
++ like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic');
|
|
|
+ eval { $_ = '(?[(\c])'; qr/$_/ };
|
|
|
+ like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error');
|
|
|
+ eval { $_ = '(?[(\c]) ]\b'; qr/$_/ };
|
|
|
+- like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error');
|
|
|
++ like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error');
|
|
|
+ eval { $_ = '(?[\c[]](])'; qr/$_/ };
|
|
|
+- like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error');
|
|
|
++ like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error');
|
|
|
+ like("\c#", qr/(?[\c#])/, '\c# should match itself');
|
|
|
+ like("\c[", qr/(?[\c[])/, '\c[ should match itself');
|
|
|
+ like("\c\ ", qr/(?[\c\])/, '\c\ should match itself');
|
|
|
+--
|
|
|
+2.22.0.vfs.1.1.57.gbaf16c8
|
|
|
+
|