diff jsparser.js @ 3:f174153281a9

massive refactoring/almost-complete rewrite.
author Atul Varma <varmaa@toolness.com>
date Sat, 30 May 2009 13:23:01 -0700
parents 032f64260794
children 559378a3ec26
line wrap: on
line diff
--- a/jsparser.js	Fri May 29 22:16:51 2009 -0700
+++ b/jsparser.js	Sat May 30 13:23:01 2009 -0700
@@ -1,80 +1,142 @@
-var Symbols = {
-  semicolon: ';',
-  leftBracket: '[',
-  rightBracket: ']',
-  leftBrace: '{',
-  rightBrace: '}',
-  leftParen: '(',
-  rightParen: ')',
-  plus: '+',
-  multiply: '*',
-  minus: '-',
-  number: /^[0-9]+/,
-  name: /^[A-Za-z]+[0-9A-Za-z_]*/,
-  newline: '\n',
-  whitespace: /^[ \t]+/
-  };
+var Parsing = {};
+
+Parsing.Symbol = function Symbol(options) {
+  if (options)
+    for (name in options)
+      if (options.hasOwnProperty(name))
+        this[name] = options[name];
+};
+
+Parsing.Symbol.prototype = {
+  leftBindingPower: 0,
+  computeSelf: function() {
+    throw new Error("No computeSelf for " + this.name);
+  },
+  computeLeft: function() {
+    throw new Error("No computeLeft for " + this.name);
+  },
+  extend: function(contents) {
+    function Subclass() {}
+    Subclass.prototype = this;
+    var instance = new Subclass();
+    if (contents)
+      for (name in contents)
+        if (contents.hasOwnProperty(name))
+          instance[name] = contents[name];
+    return instance;
+  },
+  makeTokenAt: function(text) {
+    var matchValue = null;
+    if (typeof(this.match) == 'string') {
+      if (text.indexOf(this.match) == 0)
+        matchValue = this.match;
+    } else {
+      // It's a Regular Expression.
+      var match = text.match(this.match);
+      if (match)
+        matchValue = match[0];
+    }
+    if (matchValue)
+      return this.extend({value: matchValue});
+    return null;
+  }
+};
+
+Parsing.BinaryOrUnaryOp = function BinaryOrUnaryOp(options) {
+  Parsing.Symbol.call(this, options);
+};
 
-function tokenize(options) {
-  var symbols = options.symbols;
+Parsing.BinaryOrUnaryOp.prototype = new Parsing.Symbol(
+  {computeSelf: function(parser) {
+     var unaryOp = this.extend(Parsing.UnaryOperator.prototype);
+     return unaryOp.computeSelf(parser);
+   },
+   computeLeft: function(parser, left) {
+     var binaryOp = this.extend(Parsing.BinaryOperator.prototype);
+     return binaryOp.computeLeft(parser, left);
+   }
+  });
+
+Parsing.UnaryOperator = function UnaryOperator(options) {
+  Parsing.Symbol.call(this, options);
+};
+
+Parsing.UnaryOperator.prototype = new Parsing.Symbol(
+  {computeSelf: function(parser) {
+     this.arity = "unary";
+     this.operand = parser.token;
+     parser.advance();
+     return this;
+   },
+   toString: function() {
+     return "(" + this.match + this.operand + ")";
+   }
+  });
+
+Parsing.BinaryOperator = function BinaryOperator(options) {
+  Parsing.Symbol.call(this, options);
+};
+
+Parsing.BinaryOperator.prototype = new Parsing.Symbol(
+  {computeLeft: function(parser, left) {
+     this.arity = "binary";
+     this.leftOperand = left;
+     this.rightOperand = parser.expression(this.leftBindingPower);
+     return this;
+   },
+   toString: function() {
+     return ("(" + this.leftOperand + " " + this.match + " " +
+             this.rightOperand + ")");
+   }
+  });
+
+Parsing.tokenize = function tokenize(options) {
+  var lexicon = options.lexicon;
   var text = options.text;
-  var tokenConstructors = options.tokenConstructors;
 
-  var symbolArray = [];
-  var symbolFound;
+  var tokenFound;
   var tokens = [];
   var lineNo = 0;
   var charNo = 0;
 
-  for (name in symbols)
-    symbolArray.push({name: name, symbol: symbols[name]});
-
-  function onSymbolFound(name, value) {
-    symbolFound = true;
-    if (name != 'newline' &&
-        name != 'whitespace')
-      tokens.push(new tokenConstructors[name]({name: name,
-                                               value: value,
-                                               charNo: charNo,
-                                               lineNo: lineNo}));
-    if (name == 'newline') {
-      lineNo += 1;
-      charNo = 0;
-    } else
-      charNo += value.length;
+  while (text) {
+    tokenFound = false;
+    for (var i = 0; i < lexicon.length && !tokenFound; i++) {
+      var symbol = lexicon[i];
 
-    text = text.slice(value.length);
-  }
-
-  while (text) {
-    symbolFound = false;
-    for (var i = 0; i < symbolArray.length && !symbolFound; i++) {
-      var name = symbolArray[i].name;
-      var symbol = symbolArray[i].symbol;
+      var matchedToken = symbol.makeTokenAt(text);
+      if (matchedToken) {
+        tokenFound = true;
+        if (!matchedToken.ignore) {
+          matchedToken.lineNo = lineNo;
+          matchedToken.charNo = charNo;
+          tokens.push(matchedToken);
+        }
+        var matchedValue = matchedToken.value;
+        text = text.slice(matchedValue.length);
 
-      if (typeof(symbol) == 'string') {
-        if (text.indexOf(symbol) == 0)
-          onSymbolFound(name, symbol);
-      } else if (typeof(symbol) == 'function') {
-        // It's a Regular Expression.
-        var match = text.match(symbol);
-        if (match)
-          onSymbolFound(name, match[0]);
-      } else
-        throw new Error('Unknown symbol type: ' + symbol);
+        // Increment the current line and character number.
+        var nextNewline;
+        while ((nextNewline = matchedValue.indexOf('\n')) != -1) {
+          lineNo += 1;
+          charNo = 0;
+          matchedValue = matchedValue.slice(nextNewline + 1);
+        }
+        charNo += matchedValue.length;
+      }
     }
 
-    if (!symbolFound)
+    if (!tokenFound)
       throw new Error("I have no idea what this is: " + text);
   }
 
-  tokens.push(new tokenConstructors.end({name: "end",
-                                         charNo: charNo,
-                                         lineNo: lineNo}));
+  tokens.push(new Parsing.Symbol({name: "end",
+                                  charNo: charNo,
+                                  lineNo: lineNo}));
   return tokens;
-}
+};
 
-function Parser(tokens) {
+Parsing.Parser = function Parser(tokens) {
   var self = this;
 
   tokens.reverse();
@@ -99,98 +161,50 @@
     this.advance();
     return this.expression(0);
   };
+};
+
+function testParsing(print) {
+  var MyLexicon = [
+    new Parsing.BinaryOrUnaryOp({name: 'plus',
+                                 match: '+',
+                                 leftBindingPower: 60}),
+
+    new Parsing.BinaryOrUnaryOp({name: 'minus',
+                                 match: '-',
+                                 leftBindingPower: 60}),
+
+    new Parsing.Symbol({name: 'number',
+                        match: /^[0-9]+/,
+                        computeSelf: function() {
+                          return this;
+                        },
+                        toString: function() {
+                          return this.value;
+                        }}),
+
+    new Parsing.Symbol({name: 'whitespace',
+                        match: /^\s+/,
+                        ignore: true})
+  ];
+
+  var code = '5+1-3+4+       \n     -4';
+  var tokens = Parsing.tokenize({lexicon: MyLexicon,
+                                 text: code});
+
+  function printTokens(tokens) {
+    tokens.forEach(
+      function(token) {
+        var repr = token.name;
+        if (token.value)
+          repr += ":" + token.value;
+        repr += " @L" + token.lineNo + ":" + token.charNo;
+        print(repr);
+      });
+  }
+
+  printTokens(tokens);
+  var parser = new Parsing.Parser(tokens);
+  print(parser.parse());
 }
 
-var TokenConstructors = {
-  end: function Token_end(data) {
-    this.leftBindingPower = 0;
-    this.computeSelf = function computeSelf(parser) {
-      throw new Error("at end of tokens, nothing to do!");
-    };
-    this.computeLeft = function computeLeft(parser, left) {
-      throw new Error("at end of tokens, nothing to do!");
-    };
-    this.toString = function() { return "end"; };
-    this.__proto__ = data;
-  },
-
-  number: function Token_number(data) {
-    this.leftBindingPower = 0;
-    this.computeSelf = function computeSelf(parser) {
-      return this;
-    };
-    this.computeLeft = function computeLeft(parser, left) {
-      throw new Error("numbers can't be used as operators!");
-    };
-    this.toString = function() { return this.value; };
-    this.__proto__ = data;
-  },
-
-  minus: function Token_minus(data) {
-    this.leftBindingPower = 60;
-    this.computeSelf = function computeSelf(parser) {
-      this.arity = "unary";
-      this.operand = parser.token;
-      parser.advance();
-      return this;
-    };
-    this.computeLeft = function computeLeft(parser, left) {
-      this.arity = "binary";
-      this.leftOperand = left;
-      this.rightOperand = parser.expression(this.leftBindingPower);
-      return this;
-    };
-    this.toString = function() {
-      if (this.arity == "unary")
-        return "(-" + this.operand + ")";
-      else
-        return ("(" + this.leftOperand + " - " +
-                this.rightOperand + ")");
-    };
-    this.__proto__ = data;
-  },
-
-  plus: function Token_plus(data) {
-    this.leftBindingPower = 60;
-    this.computeSelf = function computeSelf(parser) {
-      this.arity = "unary";
-      this.operand = parser.token;
-      parser.advance();
-      return this;
-    };
-    this.computeLeft = function computeLeft(parser, left) {
-      this.arity = "binary";
-      this.leftOperand = left;
-      this.rightOperand = parser.expression(this.leftBindingPower);
-      return this;
-    };
-    this.toString = function() {
-      if (this.arity == "unary")
-        return "(+" + this.operand + ")";
-      else
-        return ("(" + this.leftOperand + " + " +
-                this.rightOperand + ")");
-    };
-    this.__proto__ = data;
-  }
-};
-
-//var code = 'blarg(); if (x+1) {}';
-var code = '5+1-3+4+-4';
-var tokens = tokenize({symbols: Symbols,
-                       tokenConstructors: TokenConstructors,
-                       text: code});
-
-function printTokens(tokens) {
-  tokens.forEach(
-    function(token) {
-      var repr = token.name;
-      if (token.value)
-        repr += ":" + token.value;
-      print(repr);
-    });
-}
-
-printTokens(tokens);
-var parser = new Parser(tokens);
-print(parser.parse());
+testParsing(print);