Mercurial > js-scanner

--- a/jsscan.py	Thu Apr 22 13:12:12 2010 -0700
+++ b/jsscan.py	Thu Apr 22 13:18:09 2010 -0700
@@ -21,7 +21,12 @@
         self.line = 1
         self.char = 0

-    def tokenize(self):
+    def tokenize(self, ignore=None):
+        if ignore is None:
+            ignore = []
+        elif isinstance(ignore, basestring):
+            ignore = [ignore]
+
         while self.pos < len(self.text):
             found = None
             if self.text[self.pos] in self.LITERALS:
@@ -49,7 +54,8 @@
                             self.char += len(tokvalue)
                         continue
             if found is not None:
-                yield found
+                if found[0] not in ignore:
+                    yield found
             else:
                 raise TokenizationError('unrecognized token %s' %
                                         repr(self.text[self.pos]),
--- a/test_jsscan.py	Thu Apr 22 13:12:12 2010 -0700
+++ b/test_jsscan.py	Thu Apr 22 13:18:09 2010 -0700
@@ -20,6 +20,11 @@
     ('digits', '1', (1, 10))
     ('literal', ';', (1, 11))

+Filtering:
+
+    >>> tokenize('  k', ignore='whitespace')
+    ('name', 'k', (1, 2))
+
 Escaped double-quoted strings:

     >>> tokenize(r'"i say \\"tomato\\""')
@@ -37,8 +42,8 @@

 from jsscan import *

-def tokenize(string):
-    for token in Tokenizer(string).tokenize():
+def tokenize(string, ignore=None):
+    for token in Tokenizer(string).tokenize(ignore=ignore):
         print token

 if __name__ == '__main__':