0
|
1 """
|
|
2 C-style comments:
|
|
3
|
|
4 >>> tokenize('/* hello */')
|
|
5 ('c_comment', '/* hello */', (1, 0))
|
|
6
|
|
7 C++-style comments:
|
|
8
|
|
9 >>> tokenize('// hello')
|
|
10 ('cpp_comment', '// hello', (1, 0))
|
|
11
|
|
12 Variable definitions:
|
|
13
|
|
14 >>> tokenize(' var k = 1;')
|
|
15 ('name', 'var', (1, 2))
|
|
16 ('name', 'k', (1, 6))
|
|
17 ('whitespace', ' ', (1, 7))
|
|
18 ('literal', '=', (1, 8))
|
|
19 ('whitespace', ' ', (1, 9))
|
|
20 ('digits', '1', (1, 10))
|
|
21 ('literal', ';', (1, 11))
|
|
22
|
2
|
23 Filtering:
|
|
24
|
|
25 >>> tokenize(' k', ignore='whitespace')
|
|
26 ('name', 'k', (1, 2))
|
|
27
|
4
|
28 Many double-quoted strings on the same line:
|
|
29
|
|
30 >>> tokenize(r'"hello there "+" dude"')
|
|
31 ('string', '"hello there "', (1, 0))
|
|
32 ('literal', '+', (1, 14))
|
|
33 ('string', '" dude"', (1, 15))
|
|
34
|
|
35 Many single-quoted strings on the same line:
|
|
36
|
|
37 >>> tokenize(r"'hello there '+' dude'")
|
|
38 ('string', "'hello there '", (1, 0))
|
|
39 ('literal', '+', (1, 14))
|
|
40 ('string', "' dude'", (1, 15))
|
|
41
|
0
|
42 Escaped double-quoted strings:
|
|
43
|
|
44 >>> tokenize(r'"i say \\"tomato\\""')
|
|
45 ('string', '"i say \\\\"tomato\\\\""', (1, 0))
|
|
46
|
|
47 Unterminated double-quoted strings:
|
|
48
|
|
49 >>> tokenize(r'"i say \\"tomato\\"')
|
|
50 Traceback (most recent call last):
|
|
51 ...
|
|
52 TokenizationError: unrecognized token '"' @ line 1, char 0
|
|
53 """
|
|
54
|
|
55 import doctest
|
|
56
|
|
57 from jsscan import *
|
|
58
|
2
|
59 def tokenize(string, ignore=None):
|
|
60 for token in Tokenizer(string).tokenize(ignore=ignore):
|
0
|
61 print token
|
|
62
|
|
63 if __name__ == '__main__':
|
|
64 doctest.testmod(verbose=True)
|