Mercurial > jsparser
comparison jsparser.js @ 2:032f64260794
refactored tokenizer
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Fri, 29 May 2009 22:16:51 -0700 |
parents | 0f1ff7d5a524 |
children | f174153281a9 |
comparison
equal
deleted
inserted
replaced
1:0f1ff7d5a524 | 2:032f64260794 |
---|---|
18 function tokenize(options) { | 18 function tokenize(options) { |
19 var symbols = options.symbols; | 19 var symbols = options.symbols; |
20 var text = options.text; | 20 var text = options.text; |
21 var tokenConstructors = options.tokenConstructors; | 21 var tokenConstructors = options.tokenConstructors; |
22 | 22 |
23 var symbolArray = []; | |
24 var symbolFound; | |
23 var tokens = []; | 25 var tokens = []; |
24 var lineNo = 0; | 26 var lineNo = 0; |
25 var charNo = 0; | 27 var charNo = 0; |
26 | 28 |
29 for (name in symbols) | |
30 symbolArray.push({name: name, symbol: symbols[name]}); | |
31 | |
32 function onSymbolFound(name, value) { | |
33 symbolFound = true; | |
34 if (name != 'newline' && | |
35 name != 'whitespace') | |
36 tokens.push(new tokenConstructors[name]({name: name, | |
37 value: value, | |
38 charNo: charNo, | |
39 lineNo: lineNo})); | |
40 if (name == 'newline') { | |
41 lineNo += 1; | |
42 charNo = 0; | |
43 } else | |
44 charNo += value.length; | |
45 | |
46 text = text.slice(value.length); | |
47 } | |
48 | |
27 while (text) { | 49 while (text) { |
28 var token = null; | 50 symbolFound = false; |
29 for (name in symbols) { | 51 for (var i = 0; i < symbolArray.length && !symbolFound; i++) { |
30 var symbol = symbols[name]; | 52 var name = symbolArray[i].name; |
53 var symbol = symbolArray[i].symbol; | |
54 | |
31 if (typeof(symbol) == 'string') { | 55 if (typeof(symbol) == 'string') { |
32 if (text.indexOf(symbol) == 0) { | 56 if (text.indexOf(symbol) == 0) |
33 if (name != 'newline') { | 57 onSymbolFound(name, symbol); |
34 token = new tokenConstructors[name]({name: name, | |
35 charNo: charNo, | |
36 lineNo: lineNo}); | |
37 charNo += symbol.length; | |
38 } else { | |
39 lineNo += 1; | |
40 charNo = 0; | |
41 } | |
42 text = text.slice(symbol.length); | |
43 } | |
44 } else if (typeof(symbol) == 'function') { | 58 } else if (typeof(symbol) == 'function') { |
45 // It's a Regular Expression. | 59 // It's a Regular Expression. |
46 var match = text.match(symbol); | 60 var match = text.match(symbol); |
47 if (match) { | 61 if (match) |
48 match = match[0]; | 62 onSymbolFound(name, match[0]); |
49 if (name != 'whitespace') | |
50 token = new tokenConstructors[name]({name: name, | |
51 value: match, | |
52 charNo: charNo, | |
53 lineNo: lineNo}); | |
54 charNo += match.length; | |
55 text = text.slice(match.length); | |
56 } | |
57 } else | 63 } else |
58 throw new Error('Unknown symbol type: ' + symbol); | 64 throw new Error('Unknown symbol type: ' + symbol); |
59 if (token) | |
60 break; | |
61 } | 65 } |
62 if (!token) | 66 |
67 if (!symbolFound) | |
63 throw new Error("I have no idea what this is: " + text); | 68 throw new Error("I have no idea what this is: " + text); |
64 tokens.push(token); | |
65 } | 69 } |
66 | 70 |
67 tokens.push(new tokenConstructors.end({name: "end", | 71 tokens.push(new tokenConstructors.end({name: "end", |
68 charNo: charNo, | 72 charNo: charNo, |
69 lineNo: lineNo})); | 73 lineNo: lineNo})); |