michael@0: // |reftest| random -- bogus perf test (bug 467263) michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * michael@0: * Date: 14 Feb 2002 michael@0: * SUMMARY: Performance: Regexp performance degraded from 4.7 michael@0: * See http://bugzilla.mozilla.org/show_bug.cgi?id=85721 michael@0: * michael@0: * Adjust this testcase if necessary. The FAST constant defines michael@0: * an upper bound in milliseconds for any execution to take. michael@0: * michael@0: */ michael@0: //----------------------------------------------------------------------------- michael@0: var BUGNUMBER = 85721; michael@0: var summary = 'Performance: execution of regular expression'; michael@0: var FAST = 100; // execution should be 100 ms or less to pass the test michael@0: var MSG_FAST = 'Execution took less than ' + FAST + ' ms'; michael@0: var MSG_SLOW = 'Execution took '; michael@0: var MSG_MS = ' ms'; michael@0: var str = ''; michael@0: var re = ''; michael@0: var status = ''; michael@0: var actual = ''; michael@0: var expect= ''; michael@0: michael@0: printBugNumber(BUGNUMBER); michael@0: printStatus (summary); michael@0: michael@0: michael@0: function elapsedTime(startTime) michael@0: { michael@0: return new Date() - startTime; michael@0: } michael@0: michael@0: michael@0: function isThisFast(ms) michael@0: { michael@0: if (ms <= FAST) michael@0: return MSG_FAST; michael@0: return MSG_SLOW + ms + MSG_MS; michael@0: } michael@0: michael@0: michael@0: michael@0: /* michael@0: * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2). michael@0: */ michael@0: str=' www.m.com drive.class\nfoo goo '; michael@0: re = /\s*\s*([^\r\n]*?)\s*<\/sql:url>\s*\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/; michael@0: expect = Array(" www.m.com drive.class\nfoo goo ","conn1","www.m.com","drive.class","foo ","foo","goo ","goo"); michael@0: michael@0: /* michael@0: * Check performance - michael@0: */ michael@0: status = inSection(1); michael@0: var start = new Date(); michael@0: var result = re.exec(str); michael@0: actual = elapsedTime(start); michael@0: reportCompare(isThisFast(FAST), isThisFast(actual), status); michael@0: michael@0: /* michael@0: * Check accuracy - michael@0: */ michael@0: status = inSection(2); michael@0: testRegExp([status], [re], [str], [result], [expect]); michael@0: michael@0: michael@0: michael@0: /* michael@0: * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4). michael@0: * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B michael@0: */ michael@0: michael@0: //# Some things for avoiding backslashitis later on. michael@0: $esc = '\\\\'; michael@0: $Period = '\.'; michael@0: $space = '\040'; $tab = '\t'; michael@0: $OpenBR = '\\['; $CloseBR = '\\]'; michael@0: $OpenParen = '\\('; $CloseParen = '\\)'; michael@0: $NonASCII = '\x80-\xff'; $ctrl = '\000-\037'; michael@0: $CRlist = '\n\015'; //# note: this should really be only \015. michael@0: // Items 19, 20, 21 michael@0: $qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]'; // # for within "..." michael@0: $dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']'; // # for within [...] michael@0: $quoted_pair = $esc + '[^' + $NonASCII + ']'; // # an escaped character michael@0: michael@0: //############################################################################## michael@0: //# Items 22 and 23, comment. michael@0: //# Impossible to do properly with a regex, I make do by allowing at most one level of nesting. michael@0: $ctext = '[^' + $esc + $NonASCII + $CRlist + '()]'; michael@0: michael@0: //# $Cnested matches one non-nested comment. michael@0: //# It is unrolled, with normal of $ctext, special of $quoted_pair. michael@0: $Cnested = michael@0: $OpenParen + // # ( michael@0: $ctext + '*' + // # normal* michael@0: '(?:' + $quoted_pair + $ctext + '*)*' + // # (special normal*)* michael@0: $CloseParen; // # ) michael@0: michael@0: michael@0: //# $comment allows one level of nested parentheses michael@0: //# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested) michael@0: $comment = michael@0: $OpenParen + // # ( michael@0: $ctext + '*' + // # normal* michael@0: '(?:' + // # ( michael@0: '(?:' + $quoted_pair + '|' + $Cnested + ')' + // # special michael@0: $ctext + '*' + // # normal* michael@0: ')*' + // # )* michael@0: $CloseParen; // # ) michael@0: michael@0: michael@0: //############################################################################## michael@0: //# $X is optional whitespace/comments. michael@0: $X = michael@0: '[' + $space + $tab + ']*' + // # Nab whitespace. michael@0: '(?:' + $comment + '[' + $space + $tab + ']*)*'; // # If comment found, allow more spaces. michael@0: michael@0: michael@0: //# Item 10: atom michael@0: $atom_char = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']'; michael@0: $atom = michael@0: $atom_char + '+' + // # some number of atom characters... michael@0: '(?!' + $atom_char + ')'; // # ..not followed by something that could be part of an atom michael@0: michael@0: // # Item 11: doublequoted string, unrolled. michael@0: $quoted_str = michael@0: '\"' + // # " michael@0: $qtext + '*' + // # normal michael@0: '(?:' + $quoted_pair + $qtext + '*)*' + // # ( special normal* )* michael@0: '\"'; // # " michael@0: michael@0: //# Item 7: word is an atom or quoted string michael@0: $word = michael@0: '(?:' + michael@0: $atom + // # Atom michael@0: '|' + // # or michael@0: $quoted_str + // # Quoted string michael@0: ')' michael@0: michael@0: //# Item 12: domain-ref is just an atom michael@0: $domain_ref = $atom; michael@0: michael@0: //# Item 13: domain-literal is like a quoted string, but [...] instead of "..." michael@0: $domain_lit = michael@0: $OpenBR + // # [ michael@0: '(?:' + $dtext + '|' + $quoted_pair + ')*' + // # stuff michael@0: $CloseBR; // # ] michael@0: michael@0: // # Item 9: sub-domain is a domain-ref or domain-literal michael@0: $sub_domain = michael@0: '(?:' + michael@0: $domain_ref + michael@0: '|' + michael@0: $domain_lit + michael@0: ')' + michael@0: $X; // # optional trailing comments michael@0: michael@0: // # Item 6: domain is a list of subdomains separated by dots. michael@0: $domain = michael@0: $sub_domain + michael@0: '(?:' + michael@0: $Period + $X + $sub_domain + michael@0: ')*'; michael@0: michael@0: //# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon. michael@0: $route = michael@0: '\@' + $X + $domain + michael@0: '(?:,' + $X + '\@' + $X + $domain + ')*' + // # additional domains michael@0: ':' + michael@0: $X; // # optional trailing comments michael@0: michael@0: //# Item 6: local-part is a bunch of $word separated by periods michael@0: $local_part = michael@0: $word + $X michael@0: '(?:' + michael@0: $Period + $X + $word + $X + // # additional words michael@0: ')*'; michael@0: michael@0: // # Item 2: addr-spec is local@domain michael@0: $addr_spec = michael@0: $local_part + '\@' + $X + $domain; michael@0: michael@0: //# Item 4: route-addr is michael@0: $route_addr = michael@0: '<' + $X + // # < michael@0: '(?:' + $route + ')?' + // # optional route michael@0: $addr_spec + // # address spec michael@0: '>'; // # > michael@0: michael@0: //# Item 3: phrase........ michael@0: $phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab michael@0: michael@0: //# Like atom-char, but without listing space, and uses phrase_ctrl. michael@0: //# Since the class is negated, this matches the same as atom-char plus space and tab michael@0: $phrase_char = michael@0: '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']'; michael@0: michael@0: // # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X michael@0: // # because we take care of it manually. michael@0: $phrase = michael@0: $word + // # leading word michael@0: $phrase_char + '*' + // # "normal" atoms and/or spaces michael@0: '(?:' + michael@0: '(?:' + $comment + '|' + $quoted_str + ')' + // # "special" comment or quoted string michael@0: $phrase_char + '*' + // # more "normal" michael@0: ')*'; michael@0: michael@0: // ## Item #1: mailbox is an addr_spec or a phrase/route_addr michael@0: $mailbox = michael@0: $X + // # optional leading comment michael@0: '(?:' + michael@0: $phrase + $route_addr + // # name and address michael@0: '|' + // # or michael@0: $addr_spec + // # address michael@0: ')'; michael@0: michael@0: michael@0: //########################################################################### michael@0: michael@0: michael@0: re = new RegExp($mailbox, "g"); michael@0: str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'; michael@0: expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'); michael@0: michael@0: /* michael@0: * Check performance - michael@0: */ michael@0: status = inSection(3); michael@0: var start = new Date(); michael@0: var result = re.exec(str); michael@0: actual = elapsedTime(start); michael@0: reportCompare(isThisFast(FAST), isThisFast(actual), status); michael@0: michael@0: /* michael@0: * Check accuracy - michael@0: */ michael@0: status = inSection(4); michael@0: testRegExp([status], [re], [str], [result], [expect]);