# HG changeset patch # User Atul Varma # Date 1271967489 25200 # Node ID f82ff2c61c06dd9964fe33b012f9ed202ade3dbb # Parent 22781f88a8570825cc3d3e2c05430e8fce90ee49 added ignore kwarg diff -r 22781f88a857 -r f82ff2c61c06 jsscan.py --- a/jsscan.py Thu Apr 22 13:12:12 2010 -0700 +++ b/jsscan.py Thu Apr 22 13:18:09 2010 -0700 @@ -21,7 +21,12 @@ self.line = 1 self.char = 0 - def tokenize(self): + def tokenize(self, ignore=None): + if ignore is None: + ignore = [] + elif isinstance(ignore, basestring): + ignore = [ignore] + while self.pos < len(self.text): found = None if self.text[self.pos] in self.LITERALS: @@ -49,7 +54,8 @@ self.char += len(tokvalue) continue if found is not None: - yield found + if found[0] not in ignore: + yield found else: raise TokenizationError('unrecognized token %s' % repr(self.text[self.pos]), diff -r 22781f88a857 -r f82ff2c61c06 test_jsscan.py --- a/test_jsscan.py Thu Apr 22 13:12:12 2010 -0700 +++ b/test_jsscan.py Thu Apr 22 13:18:09 2010 -0700 @@ -20,6 +20,11 @@ ('digits', '1', (1, 10)) ('literal', ';', (1, 11)) +Filtering: + + >>> tokenize(' k', ignore='whitespace') + ('name', 'k', (1, 2)) + Escaped double-quoted strings: >>> tokenize(r'"i say \\"tomato\\""') @@ -37,8 +42,8 @@ from jsscan import * -def tokenize(string): - for token in Tokenizer(string).tokenize(): +def tokenize(string, ignore=None): + for token in Tokenizer(string).tokenize(ignore=ignore): print token if __name__ == '__main__':