You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

665 lines
29 KiB
JavaScript

// XRegExp 1.5.1
// (c) 2007-2012 Steven Levithan
// MIT License
// <http://xregexp.com>
// Provides an augmented, extensible, cross-browser implementation of regular expressions,
// including support for additional syntax, flags, and methods
var XRegExp;
if (XRegExp) {
// Avoid running twice, since that would break references to native globals
throw Error("can't load XRegExp twice in the same frame");
}
// Run within an anonymous function to protect variables and avoid new globals
(function (undefined) {
//---------------------------------
// Constructor
//---------------------------------
// Accepts a pattern and flags; returns a new, extended `RegExp` object. Differs from a native
// regular expression in that additional syntax and flags are supported and cross-browser
// syntax inconsistencies are ameliorated. `XRegExp(/regex/)` clones an existing regex and
// converts to type XRegExp
XRegExp = function (pattern, flags) {
var output = [],
currScope = XRegExp.OUTSIDE_CLASS,
pos = 0,
context, tokenResult, match, chr, regex;
if (XRegExp.isRegExp(pattern)) {
if (flags !== undefined)
throw TypeError("can't supply flags when constructing one RegExp from another");
return clone(pattern);
}
// Tokens become part of the regex construction process, so protect against infinite
// recursion when an XRegExp is constructed within a token handler or trigger
if (isInsideConstructor)
throw Error("can't call the XRegExp constructor within token definition functions");
flags = flags || "";
context = { // `this` object for custom tokens
hasNamedCapture: false,
captureNames: [],
hasFlag: function (flag) {return flags.indexOf(flag) > -1;},
setFlag: function (flag) {flags += flag;}
};
while (pos < pattern.length) {
// Check for custom tokens at the current position
tokenResult = runTokens(pattern, pos, currScope, context);
if (tokenResult) {
output.push(tokenResult.output);
pos += (tokenResult.match[0].length || 1);
} else {
// Check for native multicharacter metasequences (excluding character classes) at
// the current position
if (match = nativ.exec.call(nativeTokens[currScope], pattern.slice(pos))) {
output.push(match[0]);
pos += match[0].length;
} else {
chr = pattern.charAt(pos);
if (chr === "[")
currScope = XRegExp.INSIDE_CLASS;
else if (chr === "]")
currScope = XRegExp.OUTSIDE_CLASS;
// Advance position one character
output.push(chr);
pos++;
}
}
}
regex = RegExp(output.join(""), nativ.replace.call(flags, flagClip, ""));
regex._xregexp = {
source: pattern,
captureNames: context.hasNamedCapture ? context.captureNames : null
};
return regex;
};
//---------------------------------
// Public properties
//---------------------------------
XRegExp.version = "1.5.1";
// Token scope bitflags
XRegExp.INSIDE_CLASS = 1;
XRegExp.OUTSIDE_CLASS = 2;
//---------------------------------
// Private variables
//---------------------------------
var replacementToken = /\$(?:(\d\d?|[$&`'])|{([$\w]+)})/g,
flagClip = /[^gimy]+|([\s\S])(?=[\s\S]*\1)/g, // Nonnative and duplicate flags
quantifier = /^(?:[?*+]|{\d+(?:,\d*)?})\??/,
isInsideConstructor = false,
tokens = [],
// Copy native globals for reference ("native" is an ES3 reserved keyword)
nativ = {
exec: RegExp.prototype.exec,
test: RegExp.prototype.test,
match: String.prototype.match,
replace: String.prototype.replace,
split: String.prototype.split
},
compliantExecNpcg = nativ.exec.call(/()??/, "")[1] === undefined, // check `exec` handling of nonparticipating capturing groups
compliantLastIndexIncrement = function () {
var x = /^/g;
nativ.test.call(x, "");
return !x.lastIndex;
}(),
hasNativeY = RegExp.prototype.sticky !== undefined,
nativeTokens = {};
// `nativeTokens` match native multicharacter metasequences only (including deprecated octals,
// excluding character classes)
nativeTokens[XRegExp.INSIDE_CLASS] = /^(?:\\(?:[0-3][0-7]{0,2}|[4-7][0-7]?|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S]))/;
nativeTokens[XRegExp.OUTSIDE_CLASS] = /^(?:\\(?:0(?:[0-3][0-7]{0,2}|[4-7][0-7]?)?|[1-9]\d*|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|\(\?[:=!]|[?*+]\?|{\d+(?:,\d*)?}\??)/;
//---------------------------------
// Public methods
//---------------------------------
// Lets you extend or change XRegExp syntax and create custom flags. This is used internally by
// the XRegExp library and can be used to create XRegExp plugins. This function is intended for
// users with advanced knowledge of JavaScript's regular expression syntax and behavior. It can
// be disabled by `XRegExp.freezeTokens`
XRegExp.addToken = function (regex, handler, scope, trigger) {
tokens.push({
pattern: clone(regex, "g" + (hasNativeY ? "y" : "")),
handler: handler,
scope: scope || XRegExp.OUTSIDE_CLASS,
trigger: trigger || null
});
};
// Accepts a pattern and flags; returns an extended `RegExp` object. If the pattern and flag
// combination has previously been cached, the cached copy is returned; otherwise the newly
// created regex is cached
XRegExp.cache = function (pattern, flags) {
var key = pattern + "/" + (flags || "");
return XRegExp.cache[key] || (XRegExp.cache[key] = XRegExp(pattern, flags));
};
// Accepts a `RegExp` instance; returns a copy with the `/g` flag set. The copy has a fresh
// `lastIndex` (set to zero). If you want to copy a regex without forcing the `global`
// property, use `XRegExp(regex)`. Do not use `RegExp(regex)` because it will not preserve
// special properties required for named capture
XRegExp.copyAsGlobal = function (regex) {
return clone(regex, "g");
};
// Accepts a string; returns the string with regex metacharacters escaped. The returned string
// can safely be used at any point within a regex to match the provided literal string. Escaped
// characters are [ ] { } ( ) * + ? - . , \ ^ $ | # and whitespace
XRegExp.escape = function (str) {
return str.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
};
// Accepts a string to search, regex to search with, position to start the search within the
// string (default: 0), and an optional Boolean indicating whether matches must start at-or-
// after the position or at the specified position only. This function ignores the `lastIndex`
// of the provided regex in its own handling, but updates the property for compatibility
XRegExp.execAt = function (str, regex, pos, anchored) {
var r2 = clone(regex, "g" + ((anchored && hasNativeY) ? "y" : "")),
match;
r2.lastIndex = pos = pos || 0;
match = r2.exec(str); // Run the altered `exec` (required for `lastIndex` fix, etc.)
if (anchored && match && match.index !== pos)
match = null;
if (regex.global)
regex.lastIndex = match ? r2.lastIndex : 0;
return match;
};
// Breaks the unrestorable link to XRegExp's private list of tokens, thereby preventing
// syntax and flag changes. Should be run after XRegExp and any plugins are loaded
XRegExp.freezeTokens = function () {
XRegExp.addToken = function () {
throw Error("can't run addToken after freezeTokens");
};
};
// Accepts any value; returns a Boolean indicating whether the argument is a `RegExp` object.
// Note that this is also `true` for regex literals and regexes created by the `XRegExp`
// constructor. This works correctly for variables created in another frame, when `instanceof`
// and `constructor` checks would fail to work as intended
XRegExp.isRegExp = function (o) {
return Object.prototype.toString.call(o) === "[object RegExp]";
};
// Executes `callback` once per match within `str`. Provides a simpler and cleaner way to
// iterate over regex matches compared to the traditional approaches of subverting
// `String.prototype.replace` or repeatedly calling `exec` within a `while` loop
XRegExp.iterate = function (str, regex, callback, context) {
var r2 = clone(regex, "g"),
i = -1, match;
while (match = r2.exec(str)) { // Run the altered `exec` (required for `lastIndex` fix, etc.)
if (regex.global)
regex.lastIndex = r2.lastIndex; // Doing this to follow expectations if `lastIndex` is checked within `callback`
callback.call(context, match, ++i, str, regex);
if (r2.lastIndex === match.index)
r2.lastIndex++;
}
if (regex.global)
regex.lastIndex = 0;
};
// Accepts a string and an array of regexes; returns the result of using each successive regex
// to search within the matches of the previous regex. The array of regexes can also contain
// objects with `regex` and `backref` properties, in which case the named or numbered back-
// references specified are passed forward to the next regex or returned. E.g.:
// var xregexpImgFileNames = XRegExp.matchChain(html, [
// {regex: /<img\b([^>]+)>/i, backref: 1}, // <img> tag attributes
// {regex: XRegExp('(?ix) \\s src=" (?<src> [^"]+ )'), backref: "src"}, // src attribute values
// {regex: XRegExp("^http://xregexp\\.com(/[^#?]+)", "i"), backref: 1}, // xregexp.com paths
// /[^\/]+$/ // filenames (strip directory paths)
// ]);
XRegExp.matchChain = function (str, chain) {
return function recurseChain (values, level) {
var item = chain[level].regex ? chain[level] : {regex: chain[level]},
regex = clone(item.regex, "g"),
matches = [], i;
for (i = 0; i < values.length; i++) {
XRegExp.iterate(values[i], regex, function (match) {
matches.push(item.backref ? (match[item.backref] || "") : match[0]);
});
}
return ((level === chain.length - 1) || !matches.length) ?
matches : recurseChain(matches, level + 1);
}([str], 0);
};
//---------------------------------
// New RegExp prototype methods
//---------------------------------
// Accepts a context object and arguments array; returns the result of calling `exec` with the
// first value in the arguments array. the context is ignored but is accepted for congruity
// with `Function.prototype.apply`
RegExp.prototype.apply = function (context, args) {
return this.exec(args[0]);
};
// Accepts a context object and string; returns the result of calling `exec` with the provided
// string. the context is ignored but is accepted for congruity with `Function.prototype.call`
RegExp.prototype.call = function (context, str) {
return this.exec(str);
};
//---------------------------------
// Overriden native methods
//---------------------------------
// Adds named capture support (with backreferences returned as `result.name`), and fixes two
// cross-browser issues per ES3:
// - Captured values for nonparticipating capturing groups should be returned as `undefined`,
// rather than the empty string.
// - `lastIndex` should not be incremented after zero-length matches.
RegExp.prototype.exec = function (str) {
var match, name, r2, origLastIndex;
if (!this.global)
origLastIndex = this.lastIndex;
match = nativ.exec.apply(this, arguments);
if (match) {
// Fix browsers whose `exec` methods don't consistently return `undefined` for
// nonparticipating capturing groups
if (!compliantExecNpcg && match.length > 1 && indexOf(match, "") > -1) {
r2 = RegExp(this.source, nativ.replace.call(getNativeFlags(this), "g", ""));
// Using `str.slice(match.index)` rather than `match[0]` in case lookahead allowed
// matching due to characters outside the match
nativ.replace.call((str + "").slice(match.index), r2, function () {
for (var i = 1; i < arguments.length - 2; i++) {
if (arguments[i] === undefined)
match[i] = undefined;
}
});
}
// Attach named capture properties
if (this._xregexp && this._xregexp.captureNames) {
for (var i = 1; i < match.length; i++) {
name = this._xregexp.captureNames[i - 1];
if (name)
match[name] = match[i];
}
}
// Fix browsers that increment `lastIndex` after zero-length matches
if (!compliantLastIndexIncrement && this.global && !match[0].length && (this.lastIndex > match.index))
this.lastIndex--;
}
if (!this.global)
this.lastIndex = origLastIndex; // Fix IE, Opera bug (last tested IE 9.0.5, Opera 11.61 on Windows)
return match;
};
// Fix browser bugs in native method
RegExp.prototype.test = function (str) {
// Use the native `exec` to skip some processing overhead, even though the altered
// `exec` would take care of the `lastIndex` fixes
var match, origLastIndex;
if (!this.global)
origLastIndex = this.lastIndex;
match = nativ.exec.call(this, str);
// Fix browsers that increment `lastIndex` after zero-length matches
if (match && !compliantLastIndexIncrement && this.global && !match[0].length && (this.lastIndex > match.index))
this.lastIndex--;
if (!this.global)
this.lastIndex = origLastIndex; // Fix IE, Opera bug (last tested IE 9.0.5, Opera 11.61 on Windows)
return !!match;
};
// Adds named capture support and fixes browser bugs in native method
String.prototype.match = function (regex) {
if (!XRegExp.isRegExp(regex))
regex = RegExp(regex); // Native `RegExp`
if (regex.global) {
var result = nativ.match.apply(this, arguments);
regex.lastIndex = 0; // Fix IE bug
return result;
}
return regex.exec(this); // Run the altered `exec`
};
// Adds support for `${n}` tokens for named and numbered backreferences in replacement text,
// and provides named backreferences to replacement functions as `arguments[0].name`. Also
// fixes cross-browser differences in replacement text syntax when performing a replacement
// using a nonregex search value, and the value of replacement regexes' `lastIndex` property
// during replacement iterations. Note that this doesn't support SpiderMonkey's proprietary
// third (`flags`) parameter
String.prototype.replace = function (search, replacement) {
var isRegex = XRegExp.isRegExp(search),
captureNames, result, str, origLastIndex;
// There are too many combinations of search/replacement types/values and browser bugs that
// preclude passing to native `replace`, so don't try
//if (...)
// return nativ.replace.apply(this, arguments);
if (isRegex) {
if (search._xregexp)
captureNames = search._xregexp.captureNames; // Array or `null`
if (!search.global)
origLastIndex = search.lastIndex;
} else {
search = search + ""; // Type conversion
}
if (Object.prototype.toString.call(replacement) === "[object Function]") {
result = nativ.replace.call(this + "", search, function () {
if (captureNames) {
// Change the `arguments[0]` string primitive to a String object which can store properties
arguments[0] = new String(arguments[0]);
// Store named backreferences on `arguments[0]`
for (var i = 0; i < captureNames.length; i++) {
if (captureNames[i])
arguments[0][captureNames[i]] = arguments[i + 1];
}
}
// Update `lastIndex` before calling `replacement` (fix browsers)
if (isRegex && search.global)
search.lastIndex = arguments[arguments.length - 2] + arguments[0].length;
return replacement.apply(null, arguments);
});
} else {
str = this + ""; // Type conversion, so `args[args.length - 1]` will be a string (given nonstring `this`)
result = nativ.replace.call(str, search, function () {
var args = arguments; // Keep this function's `arguments` available through closure
return nativ.replace.call(replacement + "", replacementToken, function ($0, $1, $2) {
// Numbered backreference (without delimiters) or special variable
if ($1) {
switch ($1) {
case "$": return "$";
case "&": return args[0];
case "`": return args[args.length - 1].slice(0, args[args.length - 2]);
case "'": return args[args.length - 1].slice(args[args.length - 2] + args[0].length);
// Numbered backreference
default:
// What does "$10" mean?
// - Backreference 10, if 10 or more capturing groups exist
// - Backreference 1 followed by "0", if 1-9 capturing groups exist
// - Otherwise, it's the string "$10"
// Also note:
// - Backreferences cannot be more than two digits (enforced by `replacementToken`)
// - "$01" is equivalent to "$1" if a capturing group exists, otherwise it's the string "$01"
// - There is no "$0" token ("$&" is the entire match)
var literalNumbers = "";
$1 = +$1; // Type conversion; drop leading zero
if (!$1) // `$1` was "0" or "00"
return $0;
while ($1 > args.length - 3) {
literalNumbers = String.prototype.slice.call($1, -1) + literalNumbers;
$1 = Math.floor($1 / 10); // Drop the last digit
}
return ($1 ? args[$1] || "" : "$") + literalNumbers;
}
// Named backreference or delimited numbered backreference
} else {
// What does "${n}" mean?
// - Backreference to numbered capture n. Two differences from "$n":
// - n can be more than two digits
// - Backreference 0 is allowed, and is the entire match
// - Backreference to named capture n, if it exists and is not a number overridden by numbered capture
// - Otherwise, it's the string "${n}"
var n = +$2; // Type conversion; drop leading zeros
if (n <= args.length - 3)
return args[n];
n = captureNames ? indexOf(captureNames, $2) : -1;
return n > -1 ? args[n + 1] : $0;
}
});
});
}
if (isRegex) {
if (search.global)
search.lastIndex = 0; // Fix IE, Safari bug (last tested IE 9.0.5, Safari 5.1.2 on Windows)
else
search.lastIndex = origLastIndex; // Fix IE, Opera bug (last tested IE 9.0.5, Opera 11.61 on Windows)
}
return result;
};
// A consistent cross-browser, ES3 compliant `split`
String.prototype.split = function (s /* separator */, limit) {
// If separator `s` is not a regex, use the native `split`
if (!XRegExp.isRegExp(s))
return nativ.split.apply(this, arguments);
var str = this + "", // Type conversion
output = [],
lastLastIndex = 0,
match, lastLength;
// Behavior for `limit`: if it's...
// - `undefined`: No limit
// - `NaN` or zero: Return an empty array
// - A positive number: Use `Math.floor(limit)`
// - A negative number: No limit
// - Other: Type-convert, then use the above rules
if (limit === undefined || +limit < 0) {
limit = Infinity;
} else {
limit = Math.floor(+limit);
if (!limit)
return [];
}
// This is required if not `s.global`, and it avoids needing to set `s.lastIndex` to zero
// and restore it to its original value when we're done using the regex
s = XRegExp.copyAsGlobal(s);
while (match = s.exec(str)) { // Run the altered `exec` (required for `lastIndex` fix, etc.)
if (s.lastIndex > lastLastIndex) {
output.push(str.slice(lastLastIndex, match.index));
if (match.length > 1 && match.index < str.length)
Array.prototype.push.apply(output, match.slice(1));
lastLength = match[0].length;
lastLastIndex = s.lastIndex;
if (output.length >= limit)
break;
}
if (s.lastIndex === match.index)
s.lastIndex++;
}
if (lastLastIndex === str.length) {
if (!nativ.test.call(s, "") || lastLength)
output.push("");
} else {
output.push(str.slice(lastLastIndex));
}
return output.length > limit ? output.slice(0, limit) : output;
};
//---------------------------------
// Private helper functions
//---------------------------------
// Supporting function for `XRegExp`, `XRegExp.copyAsGlobal`, etc. Returns a copy of a `RegExp`
// instance with a fresh `lastIndex` (set to zero), preserving properties required for named
// capture. Also allows adding new flags in the process of copying the regex
function clone (regex, additionalFlags) {
if (!XRegExp.isRegExp(regex))
throw TypeError("type RegExp expected");
var x = regex._xregexp;
regex = XRegExp(regex.source, getNativeFlags(regex) + (additionalFlags || ""));
if (x) {
regex._xregexp = {
source: x.source,
captureNames: x.captureNames ? x.captureNames.slice(0) : null
};
}
return regex;
}
function getNativeFlags (regex) {
return (regex.global ? "g" : "") +
(regex.ignoreCase ? "i" : "") +
(regex.multiline ? "m" : "") +
(regex.extended ? "x" : "") + // Proposed for ES4; included in AS3
(regex.sticky ? "y" : "");
}
function runTokens (pattern, index, scope, context) {
var i = tokens.length,
result, match, t;
// Protect against constructing XRegExps within token handler and trigger functions
isInsideConstructor = true;
// Must reset `isInsideConstructor`, even if a `trigger` or `handler` throws
try {
while (i--) { // Run in reverse order
t = tokens[i];
if ((scope & t.scope) && (!t.trigger || t.trigger.call(context))) {
t.pattern.lastIndex = index;
match = t.pattern.exec(pattern); // Running the altered `exec` here allows use of named backreferences, etc.
if (match && match.index === index) {
result = {
output: t.handler.call(context, match, scope),
match: match
};
break;
}
}
}
} catch (err) {
throw err;
} finally {
isInsideConstructor = false;
}
return result;
}
function indexOf (array, item, from) {
if (Array.prototype.indexOf) // Use the native array method if available
return array.indexOf(item, from);
for (var i = from || 0; i < array.length; i++) {
if (array[i] === item)
return i;
}
return -1;
}
//---------------------------------
// Built-in tokens
//---------------------------------
// Augment XRegExp's regular expression syntax and flags. Note that when adding tokens, the
// third (`scope`) argument defaults to `XRegExp.OUTSIDE_CLASS`
// Comment pattern: (?# )
XRegExp.addToken(
/\(\?#[^)]*\)/,
function (match) {
// Keep tokens separated unless the following token is a quantifier
return nativ.test.call(quantifier, match.input.slice(match.index + match[0].length)) ? "" : "(?:)";
}
);
// Capturing group (match the opening parenthesis only).
// Required for support of named capturing groups
XRegExp.addToken(
/\((?!\?)/,
function () {
this.captureNames.push(null);
return "(";
}
);
// Named capturing group (match the opening delimiter only): (?<name>
XRegExp.addToken(
/\(\?<([$\w]+)>/,
function (match) {
this.captureNames.push(match[1]);
this.hasNamedCapture = true;
return "(";
}
);
// Named backreference: \k<name>
XRegExp.addToken(
/\\k<([\w$]+)>/,
function (match) {
var index = indexOf(this.captureNames, match[1]);
// Keep backreferences separate from subsequent literal numbers. Preserve back-
// references to named groups that are undefined at this point as literal strings
return index > -1 ?
"\\" + (index + 1) + (isNaN(match.input.charAt(match.index + match[0].length)) ? "" : "(?:)") :
match[0];
}
);
// Empty character class: [] or [^]
XRegExp.addToken(
/\[\^?]/,
function (match) {
// For cross-browser compatibility with ES3, convert [] to \b\B and [^] to [\s\S].
// (?!) should work like \b\B, but is unreliable in Firefox
return match[0] === "[]" ? "\\b\\B" : "[\\s\\S]";
}
);
// Mode modifier at the start of the pattern only, with any combination of flags imsx: (?imsx)
// Does not support x(?i), (?-i), (?i-m), (?i: ), (?i)(?m), etc.
XRegExp.addToken(
/^\(\?([imsx]+)\)/,
function (match) {
this.setFlag(match[1]);
return "";
}
);
// Whitespace and comments, in free-spacing (aka extended) mode only
XRegExp.addToken(
/(?:\s+|#.*)+/,
function (match) {
// Keep tokens separated unless the following token is a quantifier
return nativ.test.call(quantifier, match.input.slice(match.index + match[0].length)) ? "" : "(?:)";
},
XRegExp.OUTSIDE_CLASS,
function () {return this.hasFlag("x");}
);
// Dot, in dotall (aka singleline) mode only
XRegExp.addToken(
/\./,
function () {return "[\\s\\S]";},
XRegExp.OUTSIDE_CLASS,
function () {return this.hasFlag("s");}
);
//---------------------------------
// Backward compatibility
//---------------------------------
// Uncomment the following block for compatibility with XRegExp 1.0-1.2:
/*
XRegExp.matchWithinChain = XRegExp.matchChain;
RegExp.prototype.addFlags = function (s) {return clone(this, s);};
RegExp.prototype.execAll = function (s) {var r = []; XRegExp.iterate(s, this, function (m) {r.push(m);}); return r;};
RegExp.prototype.forEachExec = function (s, f, c) {return XRegExp.iterate(s, this, f, c);};
RegExp.prototype.validate = function (s) {var r = RegExp("^(?:" + this.source + ")$(?!\\s)", getNativeFlags(this)); if (this.global) this.lastIndex = 0; return s.search(r) === 0;};
*/
})();