michael@0: // |reftest| random -- bogus perf test (bug 467263)
michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: /*
michael@0:  *
michael@0:  * Date:    14 Feb 2002
michael@0:  * SUMMARY: Performance: Regexp performance degraded from 4.7
michael@0:  * See http://bugzilla.mozilla.org/show_bug.cgi?id=85721
michael@0:  *
michael@0:  * Adjust this testcase if necessary. The FAST constant defines
michael@0:  * an upper bound in milliseconds for any execution to take.
michael@0:  *
michael@0:  */
michael@0: //-----------------------------------------------------------------------------
michael@0: var BUGNUMBER = 85721;
michael@0: var summary = 'Performance: execution of regular expression';
michael@0: var FAST = 100; // execution should be 100 ms or less to pass the test
michael@0: var MSG_FAST = 'Execution took less than ' + FAST + ' ms';
michael@0: var MSG_SLOW = 'Execution took ';
michael@0: var MSG_MS = ' ms';
michael@0: var str = '';
michael@0: var re = '';
michael@0: var status = '';
michael@0: var actual = '';
michael@0: var expect= '';
michael@0: 
michael@0: printBugNumber(BUGNUMBER);
michael@0: printStatus (summary);
michael@0: 
michael@0: 
michael@0: function elapsedTime(startTime)
michael@0: {
michael@0:   return new Date() - startTime;
michael@0: }
michael@0: 
michael@0: 
michael@0: function isThisFast(ms)
michael@0: {
michael@0:   if (ms <= FAST)
michael@0:     return MSG_FAST;
michael@0:   return MSG_SLOW + ms + MSG_MS;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: /*
michael@0:  * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2).
michael@0:  */
michael@0: str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>';
michael@0: re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/;
michael@0: expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo");
michael@0: 
michael@0: /*
michael@0:  *  Check performance -
michael@0:  */
michael@0: status = inSection(1);
michael@0: var start = new Date();
michael@0: var result = re.exec(str);
michael@0: actual = elapsedTime(start);
michael@0: reportCompare(isThisFast(FAST), isThisFast(actual), status);
michael@0: 
michael@0: /*
michael@0:  *  Check accuracy -
michael@0:  */
michael@0: status = inSection(2);
michael@0: testRegExp([status], [re], [str], [result], [expect]);
michael@0: 
michael@0: 
michael@0: 
michael@0: /*
michael@0:  * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4).
michael@0:  * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B
michael@0:  */
michael@0: 
michael@0: //# Some things for avoiding backslashitis later on.
michael@0: $esc        = '\\\\';     
michael@0: $Period      = '\.';
michael@0: $space      = '\040';              $tab         = '\t';
michael@0: $OpenBR     = '\\[';               $CloseBR     = '\\]';
michael@0: $OpenParen  = '\\(';               $CloseParen  = '\\)';
michael@0: $NonASCII   = '\x80-\xff';         $ctrl        = '\000-\037';
michael@0: $CRlist     = '\n\015';  //# note: this should really be only \015.
michael@0: // Items 19, 20, 21
michael@0: $qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]';						  // # for within "..."
michael@0: $dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']';    // # for within [...]
michael@0: $quoted_pair = $esc + '[^' + $NonASCII + ']';							  // # an escaped character
michael@0: 
michael@0: //##############################################################################
michael@0: //# Items 22 and 23, comment.
michael@0: //# Impossible to do properly with a regex, I make do by allowing at most one level of nesting.
michael@0: $ctext   =  '[^' + $esc + $NonASCII + $CRlist + '()]';
michael@0: 
michael@0: //# $Cnested matches one non-nested comment.
michael@0: //# It is unrolled, with normal of $ctext, special of $quoted_pair.
michael@0: $Cnested =
michael@0:   $OpenParen +                                 // #  (
michael@0:   $ctext + '*' +                            // #     normal*
michael@0:   '(?:' + $quoted_pair + $ctext + '*)*' +   // #     (special normal*)*
michael@0:   $CloseParen;                                 // #                       )
michael@0: 
michael@0: 
michael@0: //# $comment allows one level of nested parentheses
michael@0: //# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested)
michael@0: $comment =
michael@0:   $OpenParen +                                           // #  (
michael@0:   $ctext + '*' +                                     // #     normal*
michael@0:   '(?:' +                                            // #       (
michael@0:   '(?:' + $quoted_pair + '|' + $Cnested + ')' +   // #         special
michael@0:   $ctext + '*' +                                 // #         normal*
michael@0:   ')*' +                                             // #            )*
michael@0:   $CloseParen;                                           // #                )
michael@0: 
michael@0: 
michael@0: //##############################################################################
michael@0: //# $X is optional whitespace/comments.
michael@0: $X =
michael@0:   '[' + $space + $tab + ']*' +					       // # Nab whitespace.
michael@0:   '(?:' + $comment + '[' + $space + $tab + ']*)*';    // # If comment found, allow more spaces.
michael@0: 
michael@0: 
michael@0: //# Item 10: atom
michael@0: $atom_char   = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']';
michael@0: $atom =
michael@0:   $atom_char + '+' +            // # some number of atom characters...
michael@0:   '(?!' + $atom_char + ')';     // # ..not followed by something that could be part of an atom
michael@0: 
michael@0: // # Item 11: doublequoted string, unrolled.
michael@0: $quoted_str =
michael@0:   '\"' +                                         // # "
michael@0:   $qtext + '*' +                              // #   normal
michael@0:   '(?:' + $quoted_pair + $qtext + '*)*' +     // #   ( special normal* )*
michael@0:   '\"';                                          // # "
michael@0: 
michael@0: //# Item 7: word is an atom or quoted string
michael@0: $word =
michael@0:   '(?:' +
michael@0:   $atom +                // # Atom
michael@0:   '|' +                  //     #  or
michael@0:   $quoted_str +          // # Quoted string
michael@0:   ')'
michael@0: 
michael@0: //# Item 12: domain-ref is just an atom
michael@0:   $domain_ref  = $atom;
michael@0: 
michael@0: //# Item 13: domain-literal is like a quoted string, but [...] instead of  "..."
michael@0: $domain_lit  =
michael@0:   $OpenBR +								   	     // # [
michael@0:   '(?:' + $dtext + '|' + $quoted_pair + ')*' +     // #    stuff
michael@0:   $CloseBR;                                        // #           ]
michael@0: 
michael@0: // # Item 9: sub-domain is a domain-ref or domain-literal
michael@0: $sub_domain  =
michael@0:   '(?:' +
michael@0:   $domain_ref +
michael@0:   '|' +
michael@0:   $domain_lit +
michael@0:   ')' +
michael@0:   $X;                 // # optional trailing comments
michael@0: 
michael@0: // # Item 6: domain is a list of subdomains separated by dots.
michael@0: $domain =
michael@0:   $sub_domain +
michael@0:   '(?:' +
michael@0:   $Period + $X + $sub_domain +
michael@0:   ')*';
michael@0: 
michael@0: //# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon.
michael@0: $route =
michael@0:   '\@' + $X + $domain +
michael@0:   '(?:,' + $X + '\@' + $X + $domain + ')*' +  // # additional domains
michael@0:   ':' +
michael@0:   $X;					// # optional trailing comments
michael@0: 
michael@0: //# Item 6: local-part is a bunch of $word separated by periods
michael@0: $local_part =
michael@0:   $word + $X
michael@0:   '(?:' +
michael@0:   $Period + $X + $word + $X +		// # additional words
michael@0:   ')*';
michael@0: 
michael@0: // # Item 2: addr-spec is local@domain
michael@0: $addr_spec  =
michael@0:   $local_part + '\@' + $X + $domain;
michael@0: 
michael@0: //# Item 4: route-addr is <route? addr-spec>
michael@0: $route_addr =
michael@0:   '<' + $X +                     // # <
michael@0:   '(?:' + $route + ')?' +     // #       optional route
michael@0:   $addr_spec +                // #       address spec
michael@0:   '>';                           // #                 >
michael@0: 
michael@0: //# Item 3: phrase........
michael@0: $phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab
michael@0: 
michael@0: //# Like atom-char, but without listing space, and uses phrase_ctrl.
michael@0: //# Since the class is negated, this matches the same as atom-char plus space and tab
michael@0: $phrase_char =
michael@0:   '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']';
michael@0: 
michael@0: // # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X
michael@0: // # because we take care of it manually.
michael@0: $phrase =
michael@0:   $word +                                                  // # leading word
michael@0:   $phrase_char + '*' +                                     // # "normal" atoms and/or spaces
michael@0:   '(?:' +
michael@0:   '(?:' + $comment + '|' + $quoted_str + ')' +          // # "special" comment or quoted string
michael@0:   $phrase_char + '*' +                                  // #  more "normal"
michael@0:   ')*';
michael@0: 
michael@0: // ## Item #1: mailbox is an addr_spec or a phrase/route_addr
michael@0: $mailbox =
michael@0:   $X +                                // # optional leading comment
michael@0:   '(?:' +
michael@0:   $phrase + $route_addr +     // # name and address
michael@0:   '|' +                       //     #  or
michael@0:   $addr_spec +                // # address
michael@0:   ')';
michael@0: 
michael@0: 
michael@0: //###########################################################################
michael@0: 
michael@0: 
michael@0: re = new RegExp($mailbox, "g");
michael@0: str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>';
michael@0: expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>');
michael@0: 
michael@0: /*
michael@0:  *  Check performance -
michael@0:  */
michael@0: status = inSection(3);
michael@0: var start = new Date();
michael@0: var result = re.exec(str);
michael@0: actual = elapsedTime(start);
michael@0: reportCompare(isThisFast(FAST), isThisFast(actual), status);
michael@0: 
michael@0: /*
michael@0:  *  Check accuracy -
michael@0:  */
michael@0: status = inSection(4);
michael@0: testRegExp([status], [re], [str], [result], [expect]);