I'm looking for a JavaScript library (ideally a node.js package) that can check if a string matches a regular expression incrementally (i.e. one character at a time), and return indeterminate results. For example, say I have the following regex:
j.*s.*
And I want to test the string "javascript". I would like an API similar to the following:
var iregex = new IncrementalRegex('j.*s.*');
var matcher = iregex.createMatcher();
matcher.append('j');
matcher.test(); //returns "possible match"
matcher.append('a');
matcher.test(); //returns "possible match"
matcher.append('v'); matcher.append('a'); matcher.append('s');
matcher.test(); //returns "match found"
matcher.append('ript');
matcher.test(); //returns "match found"
Whereas if I tested the string "foo", I would expect something like this:
var matcher2 = iregex.createMatcher();
matcher.append('f');
matcher.test(); //returns "no match possible"
//At this point I wouldn't bother appending "oo" because I know that no match is possible.
EDIT: To be clear, append is building up the string being tested. A new matcher starts out testing against the empty string, and after a matcher.append('foo') it matches against foo. appendToString or buildUpString might have been better names to use.
Also, I have one idea of how this could potentially be done, but I haven't fully thought it through yet. Perhaps it is possible to build a "Potential match" regex from the original regex that will match strings if and only if they are the beginning of a string the original regex matches.
If your parser rules only use proper formal-language regular expressions (i.e. no backreferences, lookaheads or lookbehinds), you could translate them to NFAs (using Thompson's construction or the like) and then push each character through the standard two-stack NFA simulation algorithm: if there's no transition on the character, you've got "no"; if there is one and you've got a final state in your current state set, you've got "yes"; otherwise you've got "maybe".
Your "IncrementalRegex" can be implemented by using of an encapsulated RegExp
object.
function Matcher(pattern, flags) {
this.setExpression(pattern, flags);
}
Matcher.prototype.setExpression = function(pattern, flags) {
this.pattern = pattern;
this.flags = flags;
this.re = new RegExp(this.pattern, this.flags);
};
Matcher.prototype.append = function(pattern) {
this.setExpression(this.pattern + pattern, this.flags);
};
Matcher.prototype.test = function(str) {
return this.re.test(str);
};
var matcher = new Matcher('j.*s.*', 'i'),
str = 'JavaScript';
function test() {
console.log(matcher.re.source, ':', matcher.test(str));
}
test(); // true
matcher.append('ri');
test(); // true
matcher.append('.t');
test(); // true
matcher.append('whatever');
test(); // false
http://jsfiddle.net/f0t0n/Nkyyd/
Can you describe the exact business requirements? Maybe we'll find some more elegant way for your task implementation.