changeset 1:0f1ff7d5a524

added basic parsing
author Atul Varma <varmaa@toolness.com>
date Fri, 29 May 2009 18:13:55 -0700
parents 94358cebb977
children 032f64260794
files jsparser.js
diffstat 1 files changed, 136 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/jsparser.js	Fri May 29 15:40:53 2009 -0700
+++ b/jsparser.js	Fri May 29 18:13:55 2009 -0700
@@ -7,6 +7,8 @@
   leftParen: '(',
   rightParen: ')',
   plus: '+',
+  multiply: '*',
+  minus: '-',
   number: /^[0-9]+/,
   name: /^[A-Za-z]+[0-9A-Za-z_]*/,
   newline: '\n',
@@ -16,65 +18,175 @@
 function tokenize(options) {
   var symbols = options.symbols;
   var text = options.text;
+  var tokenConstructors = options.tokenConstructors;
 
   var tokens = [];
   var lineNo = 0;
   var charNo = 0;
 
   while (text) {
-    var wasSymbolFound = false;
+    var token = null;
     for (name in symbols) {
       var symbol = symbols[name];
       if (typeof(symbol) == 'string') {
         if (text.indexOf(symbol) == 0) {
           if (name != 'newline') {
-            tokens.push({name: name,
-                         charNo: charNo,
-                         lineNo: lineNo});
+            token = new tokenConstructors[name]({name: name,
+                                                 charNo: charNo,
+                                                 lineNo: lineNo});
             charNo += symbol.length;
           } else {
             lineNo += 1;
             charNo = 0;
           }
           text = text.slice(symbol.length);
-          wasSymbolFound = true;
-          break;
         }
       } else if (typeof(symbol) == 'function') {
         // It's a Regular Expression.
         var match = text.match(symbol);
         if (match) {
           match = match[0];
-          if (name != 'whitespace') {
-            tokens.push({name: name,
-                         value: match,
-                         charNo: charNo,
-                         lineNo: lineNo});
-          }
+          if (name != 'whitespace')
+            token = new tokenConstructors[name]({name: name,
+                                                 value: match,
+                                                 charNo: charNo,
+                                                 lineNo: lineNo});
           charNo += match.length;
           text = text.slice(match.length);
-          wasSymbolFound = true;
-          break;
         }
       } else
         throw new Error('Unknown symbol type: ' + symbol);
+      if (token)
+        break;
     }
-    if (!wasSymbolFound)
+    if (!token)
       throw new Error("I have no idea what this is: " + text);
+    tokens.push(token);
   }
 
+  tokens.push(new tokenConstructors.end({name: "end",
+                                         charNo: charNo,
+                                         lineNo: lineNo}));
   return tokens;
 }
 
-// test code
+function Parser(tokens) {
+  var self = this;
+
+  tokens.reverse();
+
+  this.advance = function advance() {
+    self.token = tokens.pop();
+  };
+
+  this.expression = function expression(rightBindingPower) {
+    var leftToken = self.token;
+    self.advance();
+    var leftValue = leftToken.computeSelf(self);
+    while (rightBindingPower < self.token.leftBindingPower) {
+      leftToken = self.token;
+      self.advance();
+      leftValue = leftToken.computeLeft(self, leftValue);
+    }
+    return leftValue;
+  };
+
+  this.parse = function parse() {
+    this.advance();
+    return this.expression(0);
+  };
+}
+
+var TokenConstructors = {
+  end: function Token_end(data) {
+    this.leftBindingPower = 0;
+    this.computeSelf = function computeSelf(parser) {
+      throw new Error("at end of tokens, nothing to do!");
+    };
+    this.computeLeft = function computeLeft(parser, left) {
+      throw new Error("at end of tokens, nothing to do!");
+    };
+    this.toString = function() { return "end"; };
+    this.__proto__ = data;
+  },
+
+  number: function Token_number(data) {
+    this.leftBindingPower = 0;
+    this.computeSelf = function computeSelf(parser) {
+      return this;
+    };
+    this.computeLeft = function computeLeft(parser, left) {
+      throw new Error("numbers can't be used as operators!");
+    };
+    this.toString = function() { return this.value; };
+    this.__proto__ = data;
+  },
 
-var code = 'blarg(); if (x+1) {}';
+  minus: function Token_minus(data) {
+    this.leftBindingPower = 60;
+    this.computeSelf = function computeSelf(parser) {
+      this.arity = "unary";
+      this.operand = parser.token;
+      parser.advance();
+      return this;
+    };
+    this.computeLeft = function computeLeft(parser, left) {
+      this.arity = "binary";
+      this.leftOperand = left;
+      this.rightOperand = parser.expression(this.leftBindingPower);
+      return this;
+    };
+    this.toString = function() {
+      if (this.arity == "unary")
+        return "(-" + this.operand + ")";
+      else
+        return ("(" + this.leftOperand + " - " +
+                this.rightOperand + ")");
+    };
+    this.__proto__ = data;
+  },
+
+  plus: function Token_plus(data) {
+    this.leftBindingPower = 60;
+    this.computeSelf = function computeSelf(parser) {
+      this.arity = "unary";
+      this.operand = parser.token;
+      parser.advance();
+      return this;
+    };
+    this.computeLeft = function computeLeft(parser, left) {
+      this.arity = "binary";
+      this.leftOperand = left;
+      this.rightOperand = parser.expression(this.leftBindingPower);
+      return this;
+    };
+    this.toString = function() {
+      if (this.arity == "unary")
+        return "(+" + this.operand + ")";
+      else
+        return ("(" + this.leftOperand + " + " +
+                this.rightOperand + ")");
+    };
+    this.__proto__ = data;
+  }
+};
+
+//var code = 'blarg(); if (x+1) {}';
+var code = '5+1-3+4+-4';
 var tokens = tokenize({symbols: Symbols,
+                       tokenConstructors: TokenConstructors,
                        text: code});
-tokens.forEach(
-  function(token) {
-    var repr = token.name;
-    if (token.value)
-      repr += ":" + token.value;
-    print(repr);
-  });
+
+function printTokens(tokens) {
+  tokens.forEach(
+    function(token) {
+      var repr = token.name;
+      if (token.value)
+        repr += ":" + token.value;
+      print(repr);
+    });
+}
+
+printTokens(tokens);
+var parser = new Parser(tokens);
+print(parser.parse());