changeset 2:f82ff2c61c06

added ignore kwarg
author Atul Varma <avarma@mozilla.com>
date Thu, 22 Apr 2010 13:18:09 -0700
parents 22781f88a857
children ce894f57b30c
files jsscan.py test_jsscan.py
diffstat 2 files changed, 15 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/jsscan.py	Thu Apr 22 13:12:12 2010 -0700
+++ b/jsscan.py	Thu Apr 22 13:18:09 2010 -0700
@@ -21,7 +21,12 @@
         self.line = 1
         self.char = 0
 
-    def tokenize(self):
+    def tokenize(self, ignore=None):
+        if ignore is None:
+            ignore = []
+        elif isinstance(ignore, basestring):
+            ignore = [ignore]
+
         while self.pos < len(self.text):
             found = None
             if self.text[self.pos] in self.LITERALS:
@@ -49,7 +54,8 @@
                             self.char += len(tokvalue)
                         continue
             if found is not None:
-                yield found
+                if found[0] not in ignore:
+                    yield found
             else:
                 raise TokenizationError('unrecognized token %s' %
                                         repr(self.text[self.pos]),
--- a/test_jsscan.py	Thu Apr 22 13:12:12 2010 -0700
+++ b/test_jsscan.py	Thu Apr 22 13:18:09 2010 -0700
@@ -20,6 +20,11 @@
     ('digits', '1', (1, 10))
     ('literal', ';', (1, 11))
 
+Filtering:
+
+    >>> tokenize('  k', ignore='whitespace')
+    ('name', 'k', (1, 2))
+
 Escaped double-quoted strings:
 
     >>> tokenize(r'"i say \\"tomato\\""')
@@ -37,8 +42,8 @@
 
 from jsscan import *
 
-def tokenize(string):
-    for token in Tokenizer(string).tokenize():
+def tokenize(string, ignore=None):
+    for token in Tokenizer(string).tokenize(ignore=ignore):
         print token
 
 if __name__ == '__main__':