Mercurial > jsparser
changeset 3:f174153281a9
massive refactoring/almost-complete rewrite.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Sat, 30 May 2009 13:23:01 -0700 |
parents | 032f64260794 |
children | 559378a3ec26 |
files | jsparser.js |
diffstat | 1 files changed, 168 insertions(+), 154 deletions(-) [+] |
line wrap: on
line diff
--- a/jsparser.js Fri May 29 22:16:51 2009 -0700 +++ b/jsparser.js Sat May 30 13:23:01 2009 -0700 @@ -1,80 +1,142 @@ -var Symbols = { - semicolon: ';', - leftBracket: '[', - rightBracket: ']', - leftBrace: '{', - rightBrace: '}', - leftParen: '(', - rightParen: ')', - plus: '+', - multiply: '*', - minus: '-', - number: /^[0-9]+/, - name: /^[A-Za-z]+[0-9A-Za-z_]*/, - newline: '\n', - whitespace: /^[ \t]+/ - }; +var Parsing = {}; + +Parsing.Symbol = function Symbol(options) { + if (options) + for (name in options) + if (options.hasOwnProperty(name)) + this[name] = options[name]; +}; + +Parsing.Symbol.prototype = { + leftBindingPower: 0, + computeSelf: function() { + throw new Error("No computeSelf for " + this.name); + }, + computeLeft: function() { + throw new Error("No computeLeft for " + this.name); + }, + extend: function(contents) { + function Subclass() {} + Subclass.prototype = this; + var instance = new Subclass(); + if (contents) + for (name in contents) + if (contents.hasOwnProperty(name)) + instance[name] = contents[name]; + return instance; + }, + makeTokenAt: function(text) { + var matchValue = null; + if (typeof(this.match) == 'string') { + if (text.indexOf(this.match) == 0) + matchValue = this.match; + } else { + // It's a Regular Expression. + var match = text.match(this.match); + if (match) + matchValue = match[0]; + } + if (matchValue) + return this.extend({value: matchValue}); + return null; + } +}; + +Parsing.BinaryOrUnaryOp = function BinaryOrUnaryOp(options) { + Parsing.Symbol.call(this, options); +}; -function tokenize(options) { - var symbols = options.symbols; +Parsing.BinaryOrUnaryOp.prototype = new Parsing.Symbol( + {computeSelf: function(parser) { + var unaryOp = this.extend(Parsing.UnaryOperator.prototype); + return unaryOp.computeSelf(parser); + }, + computeLeft: function(parser, left) { + var binaryOp = this.extend(Parsing.BinaryOperator.prototype); + return binaryOp.computeLeft(parser, left); + } + }); + +Parsing.UnaryOperator = function UnaryOperator(options) { + Parsing.Symbol.call(this, options); +}; + +Parsing.UnaryOperator.prototype = new Parsing.Symbol( + {computeSelf: function(parser) { + this.arity = "unary"; + this.operand = parser.token; + parser.advance(); + return this; + }, + toString: function() { + return "(" + this.match + this.operand + ")"; + } + }); + +Parsing.BinaryOperator = function BinaryOperator(options) { + Parsing.Symbol.call(this, options); +}; + +Parsing.BinaryOperator.prototype = new Parsing.Symbol( + {computeLeft: function(parser, left) { + this.arity = "binary"; + this.leftOperand = left; + this.rightOperand = parser.expression(this.leftBindingPower); + return this; + }, + toString: function() { + return ("(" + this.leftOperand + " " + this.match + " " + + this.rightOperand + ")"); + } + }); + +Parsing.tokenize = function tokenize(options) { + var lexicon = options.lexicon; var text = options.text; - var tokenConstructors = options.tokenConstructors; - var symbolArray = []; - var symbolFound; + var tokenFound; var tokens = []; var lineNo = 0; var charNo = 0; - for (name in symbols) - symbolArray.push({name: name, symbol: symbols[name]}); - - function onSymbolFound(name, value) { - symbolFound = true; - if (name != 'newline' && - name != 'whitespace') - tokens.push(new tokenConstructors[name]({name: name, - value: value, - charNo: charNo, - lineNo: lineNo})); - if (name == 'newline') { - lineNo += 1; - charNo = 0; - } else - charNo += value.length; + while (text) { + tokenFound = false; + for (var i = 0; i < lexicon.length && !tokenFound; i++) { + var symbol = lexicon[i]; - text = text.slice(value.length); - } - - while (text) { - symbolFound = false; - for (var i = 0; i < symbolArray.length && !symbolFound; i++) { - var name = symbolArray[i].name; - var symbol = symbolArray[i].symbol; + var matchedToken = symbol.makeTokenAt(text); + if (matchedToken) { + tokenFound = true; + if (!matchedToken.ignore) { + matchedToken.lineNo = lineNo; + matchedToken.charNo = charNo; + tokens.push(matchedToken); + } + var matchedValue = matchedToken.value; + text = text.slice(matchedValue.length); - if (typeof(symbol) == 'string') { - if (text.indexOf(symbol) == 0) - onSymbolFound(name, symbol); - } else if (typeof(symbol) == 'function') { - // It's a Regular Expression. - var match = text.match(symbol); - if (match) - onSymbolFound(name, match[0]); - } else - throw new Error('Unknown symbol type: ' + symbol); + // Increment the current line and character number. + var nextNewline; + while ((nextNewline = matchedValue.indexOf('\n')) != -1) { + lineNo += 1; + charNo = 0; + matchedValue = matchedValue.slice(nextNewline + 1); + } + charNo += matchedValue.length; + } } - if (!symbolFound) + if (!tokenFound) throw new Error("I have no idea what this is: " + text); } - tokens.push(new tokenConstructors.end({name: "end", - charNo: charNo, - lineNo: lineNo})); + tokens.push(new Parsing.Symbol({name: "end", + charNo: charNo, + lineNo: lineNo})); return tokens; -} +}; -function Parser(tokens) { +Parsing.Parser = function Parser(tokens) { var self = this; tokens.reverse(); @@ -99,98 +161,50 @@ this.advance(); return this.expression(0); }; +}; + +function testParsing(print) { + var MyLexicon = [ + new Parsing.BinaryOrUnaryOp({name: 'plus', + match: '+', + leftBindingPower: 60}), + + new Parsing.BinaryOrUnaryOp({name: 'minus', + match: '-', + leftBindingPower: 60}), + + new Parsing.Symbol({name: 'number', + match: /^[0-9]+/, + computeSelf: function() { + return this; + }, + toString: function() { + return this.value; + }}), + + new Parsing.Symbol({name: 'whitespace', + match: /^\s+/, + ignore: true}) + ]; + + var code = '5+1-3+4+ \n -4'; + var tokens = Parsing.tokenize({lexicon: MyLexicon, + text: code}); + + function printTokens(tokens) { + tokens.forEach( + function(token) { + var repr = token.name; + if (token.value) + repr += ":" + token.value; + repr += " @L" + token.lineNo + ":" + token.charNo; + print(repr); + }); + } + + printTokens(tokens); + var parser = new Parsing.Parser(tokens); + print(parser.parse()); } -var TokenConstructors = { - end: function Token_end(data) { - this.leftBindingPower = 0; - this.computeSelf = function computeSelf(parser) { - throw new Error("at end of tokens, nothing to do!"); - }; - this.computeLeft = function computeLeft(parser, left) { - throw new Error("at end of tokens, nothing to do!"); - }; - this.toString = function() { return "end"; }; - this.__proto__ = data; - }, - - number: function Token_number(data) { - this.leftBindingPower = 0; - this.computeSelf = function computeSelf(parser) { - return this; - }; - this.computeLeft = function computeLeft(parser, left) { - throw new Error("numbers can't be used as operators!"); - }; - this.toString = function() { return this.value; }; - this.__proto__ = data; - }, - - minus: function Token_minus(data) { - this.leftBindingPower = 60; - this.computeSelf = function computeSelf(parser) { - this.arity = "unary"; - this.operand = parser.token; - parser.advance(); - return this; - }; - this.computeLeft = function computeLeft(parser, left) { - this.arity = "binary"; - this.leftOperand = left; - this.rightOperand = parser.expression(this.leftBindingPower); - return this; - }; - this.toString = function() { - if (this.arity == "unary") - return "(-" + this.operand + ")"; - else - return ("(" + this.leftOperand + " - " + - this.rightOperand + ")"); - }; - this.__proto__ = data; - }, - - plus: function Token_plus(data) { - this.leftBindingPower = 60; - this.computeSelf = function computeSelf(parser) { - this.arity = "unary"; - this.operand = parser.token; - parser.advance(); - return this; - }; - this.computeLeft = function computeLeft(parser, left) { - this.arity = "binary"; - this.leftOperand = left; - this.rightOperand = parser.expression(this.leftBindingPower); - return this; - }; - this.toString = function() { - if (this.arity == "unary") - return "(+" + this.operand + ")"; - else - return ("(" + this.leftOperand + " + " + - this.rightOperand + ")"); - }; - this.__proto__ = data; - } -}; - -//var code = 'blarg(); if (x+1) {}'; -var code = '5+1-3+4+-4'; -var tokens = tokenize({symbols: Symbols, - tokenConstructors: TokenConstructors, - text: code}); - -function printTokens(tokens) { - tokens.forEach( - function(token) { - var repr = token.name; - if (token.value) - repr += ":" + token.value; - print(repr); - }); -} - -printTokens(tokens); -var parser = new Parser(tokens); -print(parser.parse()); +testParsing(print);