# HG changeset patch # User Atul Varma # Date 1243636853 25200 # Node ID 94358cebb977667b6fb3bc96b60259a0a3440d5c origination diff -r 000000000000 -r 94358cebb977 jsparser.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jsparser.js Fri May 29 15:40:53 2009 -0700 @@ -0,0 +1,80 @@ +var Symbols = { + semicolon: ';', + leftBracket: '[', + rightBracket: ']', + leftBrace: '{', + rightBrace: '}', + leftParen: '(', + rightParen: ')', + plus: '+', + number: /^[0-9]+/, + name: /^[A-Za-z]+[0-9A-Za-z_]*/, + newline: '\n', + whitespace: /^[ \t]+/ + }; + +function tokenize(options) { + var symbols = options.symbols; + var text = options.text; + + var tokens = []; + var lineNo = 0; + var charNo = 0; + + while (text) { + var wasSymbolFound = false; + for (name in symbols) { + var symbol = symbols[name]; + if (typeof(symbol) == 'string') { + if (text.indexOf(symbol) == 0) { + if (name != 'newline') { + tokens.push({name: name, + charNo: charNo, + lineNo: lineNo}); + charNo += symbol.length; + } else { + lineNo += 1; + charNo = 0; + } + text = text.slice(symbol.length); + wasSymbolFound = true; + break; + } + } else if (typeof(symbol) == 'function') { + // It's a Regular Expression. + var match = text.match(symbol); + if (match) { + match = match[0]; + if (name != 'whitespace') { + tokens.push({name: name, + value: match, + charNo: charNo, + lineNo: lineNo}); + } + charNo += match.length; + text = text.slice(match.length); + wasSymbolFound = true; + break; + } + } else + throw new Error('Unknown symbol type: ' + symbol); + } + if (!wasSymbolFound) + throw new Error("I have no idea what this is: " + text); + } + + return tokens; +} + +// test code + +var code = 'blarg(); if (x+1) {}'; +var tokens = tokenize({symbols: Symbols, + text: code}); +tokens.forEach( + function(token) { + var repr = token.name; + if (token.value) + repr += ":" + token.value; + print(repr); + });