0
|
1 """
|
|
2 C-style comments:
|
|
3
|
|
4 >>> tokenize('/* hello */')
|
|
5 ('c_comment', '/* hello */', (1, 0))
|
|
6
|
|
7 C++-style comments:
|
|
8
|
|
9 >>> tokenize('// hello')
|
|
10 ('cpp_comment', '// hello', (1, 0))
|
|
11
|
|
12 Variable definitions:
|
|
13
|
|
14 >>> tokenize(' var k = 1;')
|
|
15 ('name', 'var', (1, 2))
|
|
16 ('name', 'k', (1, 6))
|
|
17 ('whitespace', ' ', (1, 7))
|
|
18 ('literal', '=', (1, 8))
|
|
19 ('whitespace', ' ', (1, 9))
|
|
20 ('digits', '1', (1, 10))
|
|
21 ('literal', ';', (1, 11))
|
|
22
|
2
|
23 Filtering:
|
|
24
|
|
25 >>> tokenize(' k', ignore='whitespace')
|
|
26 ('name', 'k', (1, 2))
|
|
27
|
0
|
28 Escaped double-quoted strings:
|
|
29
|
|
30 >>> tokenize(r'"i say \\"tomato\\""')
|
|
31 ('string', '"i say \\\\"tomato\\\\""', (1, 0))
|
|
32
|
|
33 Unterminated double-quoted strings:
|
|
34
|
|
35 >>> tokenize(r'"i say \\"tomato\\"')
|
|
36 Traceback (most recent call last):
|
|
37 ...
|
|
38 TokenizationError: unrecognized token '"' @ line 1, char 0
|
|
39 """
|
|
40
|
|
41 import doctest
|
|
42
|
|
43 from jsscan import *
|
|
44
|
2
|
45 def tokenize(string, ignore=None):
|
|
46 for token in Tokenizer(string).tokenize(ignore=ignore):
|
0
|
47 print token
|
|
48
|
|
49 if __name__ == '__main__':
|
|
50 doctest.testmod(verbose=True)
|