Mercurial > js-scanner

"""
C-style comments:

    >>> tokenize('/* hello */')
    ('c_comment', '/* hello */', (1, 0))

C++-style comments:

    >>> tokenize('// hello')
    ('cpp_comment', '// hello', (1, 0))

Variable definitions:

    >>> tokenize('  var k = 1;')
    ('name', 'var', (1, 2))
    ('name', 'k', (1, 6))
    ('whitespace', ' ', (1, 7))
    ('literal', '=', (1, 8))
    ('whitespace', ' ', (1, 9))
    ('digits', '1', (1, 10))
    ('literal', ';', (1, 11))

Filtering:

    >>> tokenize('  k', ignore='whitespace')
    ('name', 'k', (1, 2))

Many double-quoted strings on the same line:

    >>> tokenize(r'"hello there "+" dude"')
    ('string', '"hello there "', (1, 0))
    ('literal', '+', (1, 14))
    ('string', '" dude"', (1, 15))

Many single-quoted strings on the same line:

    >>> tokenize(r"'hello there '+' dude'")
    ('string', "'hello there '", (1, 0))
    ('literal', '+', (1, 14))
    ('string', "' dude'", (1, 15))

Escaped double-quoted strings:

    >>> tokenize(r'"i say \\"tomato\\""')
    ('string', '"i say \\\\"tomato\\\\""', (1, 0))

Unterminated double-quoted strings:

    >>> tokenize(r'"i say \\"tomato\\"')
    Traceback (most recent call last):
    ...
    TokenizationError: unrecognized token '"' @ line 1, char 0
"""

import doctest

from jsscan import *

def tokenize(string, ignore=None):
    for token in Tokenizer(string).tokenize(ignore=ignore):
        print token

if __name__ == '__main__':
    doctest.testmod(verbose=True)
author	Atul Varma <avarma@mozilla.com>
date	Thu, 22 Apr 2010 20:03:31 -0700
parents	30c1f55eff96
children