changeset 0:94358cebb977

origination
author Atul Varma <varmaa@toolness.com>
date Fri, 29 May 2009 15:40:53 -0700
parents
children 0f1ff7d5a524
files jsparser.js
diffstat 1 files changed, 80 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jsparser.js	Fri May 29 15:40:53 2009 -0700
@@ -0,0 +1,80 @@
+var Symbols = {
+  semicolon: ';',
+  leftBracket: '[',
+  rightBracket: ']',
+  leftBrace: '{',
+  rightBrace: '}',
+  leftParen: '(',
+  rightParen: ')',
+  plus: '+',
+  number: /^[0-9]+/,
+  name: /^[A-Za-z]+[0-9A-Za-z_]*/,
+  newline: '\n',
+  whitespace: /^[ \t]+/
+  };
+
+function tokenize(options) {
+  var symbols = options.symbols;
+  var text = options.text;
+
+  var tokens = [];
+  var lineNo = 0;
+  var charNo = 0;
+
+  while (text) {
+    var wasSymbolFound = false;
+    for (name in symbols) {
+      var symbol = symbols[name];
+      if (typeof(symbol) == 'string') {
+        if (text.indexOf(symbol) == 0) {
+          if (name != 'newline') {
+            tokens.push({name: name,
+                         charNo: charNo,
+                         lineNo: lineNo});
+            charNo += symbol.length;
+          } else {
+            lineNo += 1;
+            charNo = 0;
+          }
+          text = text.slice(symbol.length);
+          wasSymbolFound = true;
+          break;
+        }
+      } else if (typeof(symbol) == 'function') {
+        // It's a Regular Expression.
+        var match = text.match(symbol);
+        if (match) {
+          match = match[0];
+          if (name != 'whitespace') {
+            tokens.push({name: name,
+                         value: match,
+                         charNo: charNo,
+                         lineNo: lineNo});
+          }
+          charNo += match.length;
+          text = text.slice(match.length);
+          wasSymbolFound = true;
+          break;
+        }
+      } else
+        throw new Error('Unknown symbol type: ' + symbol);
+    }
+    if (!wasSymbolFound)
+      throw new Error("I have no idea what this is: " + text);
+  }
+
+  return tokens;
+}
+
+// test code
+
+var code = 'blarg(); if (x+1) {}';
+var tokens = tokenize({symbols: Symbols,
+                       text: code});
+tokens.forEach(
+  function(token) {
+    var repr = token.name;
+    if (token.value)
+      repr += ":" + token.value;
+    print(repr);
+  });