Mercurial > js-scanner
view test_jsscan.py @ 5:815520476fbb default tip
accept '/' as a literal
author | Atul Varma <avarma@mozilla.com> |
---|---|
date | Thu, 22 Apr 2010 20:03:31 -0700 |
parents | 30c1f55eff96 |
children |
line wrap: on
line source
""" C-style comments: >>> tokenize('/* hello */') ('c_comment', '/* hello */', (1, 0)) C++-style comments: >>> tokenize('// hello') ('cpp_comment', '// hello', (1, 0)) Variable definitions: >>> tokenize(' var k = 1;') ('name', 'var', (1, 2)) ('name', 'k', (1, 6)) ('whitespace', ' ', (1, 7)) ('literal', '=', (1, 8)) ('whitespace', ' ', (1, 9)) ('digits', '1', (1, 10)) ('literal', ';', (1, 11)) Filtering: >>> tokenize(' k', ignore='whitespace') ('name', 'k', (1, 2)) Many double-quoted strings on the same line: >>> tokenize(r'"hello there "+" dude"') ('string', '"hello there "', (1, 0)) ('literal', '+', (1, 14)) ('string', '" dude"', (1, 15)) Many single-quoted strings on the same line: >>> tokenize(r"'hello there '+' dude'") ('string', "'hello there '", (1, 0)) ('literal', '+', (1, 14)) ('string', "' dude'", (1, 15)) Escaped double-quoted strings: >>> tokenize(r'"i say \\"tomato\\""') ('string', '"i say \\\\"tomato\\\\""', (1, 0)) Unterminated double-quoted strings: >>> tokenize(r'"i say \\"tomato\\"') Traceback (most recent call last): ... TokenizationError: unrecognized token '"' @ line 1, char 0 """ import doctest from jsscan import * def tokenize(string, ignore=None): for token in Tokenizer(string).tokenize(ignore=ignore): print token if __name__ == '__main__': doctest.testmod(verbose=True)