comparison jsparser.js @ 2:032f64260794

refactored tokenizer
author Atul Varma <varmaa@toolness.com>
date Fri, 29 May 2009 22:16:51 -0700
parents 0f1ff7d5a524
children f174153281a9
comparison
equal deleted inserted replaced
1:0f1ff7d5a524 2:032f64260794
18 function tokenize(options) { 18 function tokenize(options) {
19 var symbols = options.symbols; 19 var symbols = options.symbols;
20 var text = options.text; 20 var text = options.text;
21 var tokenConstructors = options.tokenConstructors; 21 var tokenConstructors = options.tokenConstructors;
22 22
23 var symbolArray = [];
24 var symbolFound;
23 var tokens = []; 25 var tokens = [];
24 var lineNo = 0; 26 var lineNo = 0;
25 var charNo = 0; 27 var charNo = 0;
26 28
29 for (name in symbols)
30 symbolArray.push({name: name, symbol: symbols[name]});
31
32 function onSymbolFound(name, value) {
33 symbolFound = true;
34 if (name != 'newline' &&
35 name != 'whitespace')
36 tokens.push(new tokenConstructors[name]({name: name,
37 value: value,
38 charNo: charNo,
39 lineNo: lineNo}));
40 if (name == 'newline') {
41 lineNo += 1;
42 charNo = 0;
43 } else
44 charNo += value.length;
45
46 text = text.slice(value.length);
47 }
48
27 while (text) { 49 while (text) {
28 var token = null; 50 symbolFound = false;
29 for (name in symbols) { 51 for (var i = 0; i < symbolArray.length && !symbolFound; i++) {
30 var symbol = symbols[name]; 52 var name = symbolArray[i].name;
53 var symbol = symbolArray[i].symbol;
54
31 if (typeof(symbol) == 'string') { 55 if (typeof(symbol) == 'string') {
32 if (text.indexOf(symbol) == 0) { 56 if (text.indexOf(symbol) == 0)
33 if (name != 'newline') { 57 onSymbolFound(name, symbol);
34 token = new tokenConstructors[name]({name: name,
35 charNo: charNo,
36 lineNo: lineNo});
37 charNo += symbol.length;
38 } else {
39 lineNo += 1;
40 charNo = 0;
41 }
42 text = text.slice(symbol.length);
43 }
44 } else if (typeof(symbol) == 'function') { 58 } else if (typeof(symbol) == 'function') {
45 // It's a Regular Expression. 59 // It's a Regular Expression.
46 var match = text.match(symbol); 60 var match = text.match(symbol);
47 if (match) { 61 if (match)
48 match = match[0]; 62 onSymbolFound(name, match[0]);
49 if (name != 'whitespace')
50 token = new tokenConstructors[name]({name: name,
51 value: match,
52 charNo: charNo,
53 lineNo: lineNo});
54 charNo += match.length;
55 text = text.slice(match.length);
56 }
57 } else 63 } else
58 throw new Error('Unknown symbol type: ' + symbol); 64 throw new Error('Unknown symbol type: ' + symbol);
59 if (token)
60 break;
61 } 65 }
62 if (!token) 66
67 if (!symbolFound)
63 throw new Error("I have no idea what this is: " + text); 68 throw new Error("I have no idea what this is: " + text);
64 tokens.push(token);
65 } 69 }
66 70
67 tokens.push(new tokenConstructors.end({name: "end", 71 tokens.push(new tokenConstructors.end({name: "end",
68 charNo: charNo, 72 charNo: charNo,
69 lineNo: lineNo})); 73 lineNo: lineNo}));