Mercurial > jsparser
changeset 1:0f1ff7d5a524
added basic parsing
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Fri, 29 May 2009 18:13:55 -0700 |
parents | 94358cebb977 |
children | 032f64260794 |
files | jsparser.js |
diffstat | 1 files changed, 136 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/jsparser.js Fri May 29 15:40:53 2009 -0700 +++ b/jsparser.js Fri May 29 18:13:55 2009 -0700 @@ -7,6 +7,8 @@ leftParen: '(', rightParen: ')', plus: '+', + multiply: '*', + minus: '-', number: /^[0-9]+/, name: /^[A-Za-z]+[0-9A-Za-z_]*/, newline: '\n', @@ -16,65 +18,175 @@ function tokenize(options) { var symbols = options.symbols; var text = options.text; + var tokenConstructors = options.tokenConstructors; var tokens = []; var lineNo = 0; var charNo = 0; while (text) { - var wasSymbolFound = false; + var token = null; for (name in symbols) { var symbol = symbols[name]; if (typeof(symbol) == 'string') { if (text.indexOf(symbol) == 0) { if (name != 'newline') { - tokens.push({name: name, - charNo: charNo, - lineNo: lineNo}); + token = new tokenConstructors[name]({name: name, + charNo: charNo, + lineNo: lineNo}); charNo += symbol.length; } else { lineNo += 1; charNo = 0; } text = text.slice(symbol.length); - wasSymbolFound = true; - break; } } else if (typeof(symbol) == 'function') { // It's a Regular Expression. var match = text.match(symbol); if (match) { match = match[0]; - if (name != 'whitespace') { - tokens.push({name: name, - value: match, - charNo: charNo, - lineNo: lineNo}); - } + if (name != 'whitespace') + token = new tokenConstructors[name]({name: name, + value: match, + charNo: charNo, + lineNo: lineNo}); charNo += match.length; text = text.slice(match.length); - wasSymbolFound = true; - break; } } else throw new Error('Unknown symbol type: ' + symbol); + if (token) + break; } - if (!wasSymbolFound) + if (!token) throw new Error("I have no idea what this is: " + text); + tokens.push(token); } + tokens.push(new tokenConstructors.end({name: "end", + charNo: charNo, + lineNo: lineNo})); return tokens; } -// test code +function Parser(tokens) { + var self = this; + + tokens.reverse(); + + this.advance = function advance() { + self.token = tokens.pop(); + }; + + this.expression = function expression(rightBindingPower) { + var leftToken = self.token; + self.advance(); + var leftValue = leftToken.computeSelf(self); + while (rightBindingPower < self.token.leftBindingPower) { + leftToken = self.token; + self.advance(); + leftValue = leftToken.computeLeft(self, leftValue); + } + return leftValue; + }; + + this.parse = function parse() { + this.advance(); + return this.expression(0); + }; +} + +var TokenConstructors = { + end: function Token_end(data) { + this.leftBindingPower = 0; + this.computeSelf = function computeSelf(parser) { + throw new Error("at end of tokens, nothing to do!"); + }; + this.computeLeft = function computeLeft(parser, left) { + throw new Error("at end of tokens, nothing to do!"); + }; + this.toString = function() { return "end"; }; + this.__proto__ = data; + }, + + number: function Token_number(data) { + this.leftBindingPower = 0; + this.computeSelf = function computeSelf(parser) { + return this; + }; + this.computeLeft = function computeLeft(parser, left) { + throw new Error("numbers can't be used as operators!"); + }; + this.toString = function() { return this.value; }; + this.__proto__ = data; + }, -var code = 'blarg(); if (x+1) {}'; + minus: function Token_minus(data) { + this.leftBindingPower = 60; + this.computeSelf = function computeSelf(parser) { + this.arity = "unary"; + this.operand = parser.token; + parser.advance(); + return this; + }; + this.computeLeft = function computeLeft(parser, left) { + this.arity = "binary"; + this.leftOperand = left; + this.rightOperand = parser.expression(this.leftBindingPower); + return this; + }; + this.toString = function() { + if (this.arity == "unary") + return "(-" + this.operand + ")"; + else + return ("(" + this.leftOperand + " - " + + this.rightOperand + ")"); + }; + this.__proto__ = data; + }, + + plus: function Token_plus(data) { + this.leftBindingPower = 60; + this.computeSelf = function computeSelf(parser) { + this.arity = "unary"; + this.operand = parser.token; + parser.advance(); + return this; + }; + this.computeLeft = function computeLeft(parser, left) { + this.arity = "binary"; + this.leftOperand = left; + this.rightOperand = parser.expression(this.leftBindingPower); + return this; + }; + this.toString = function() { + if (this.arity == "unary") + return "(+" + this.operand + ")"; + else + return ("(" + this.leftOperand + " + " + + this.rightOperand + ")"); + }; + this.__proto__ = data; + } +}; + +//var code = 'blarg(); if (x+1) {}'; +var code = '5+1-3+4+-4'; var tokens = tokenize({symbols: Symbols, + tokenConstructors: TokenConstructors, text: code}); -tokens.forEach( - function(token) { - var repr = token.name; - if (token.value) - repr += ":" + token.value; - print(repr); - }); + +function printTokens(tokens) { + tokens.forEach( + function(token) { + var repr = token.name; + if (token.value) + repr += ":" + token.value; + print(repr); + }); +} + +printTokens(tokens); +var parser = new Parser(tokens); +print(parser.parse());