changeset 4:30c1f55eff96

fixed greedy regexp bug
author Atul Varma <avarma@mozilla.com>
date Thu, 22 Apr 2010 17:31:32 -0700
parents ce894f57b30c
children 815520476fbb
files jsscan.py test_jsscan.py
diffstat 2 files changed, 16 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/jsscan.py	Thu Apr 22 13:38:20 2010 -0700
+++ b/jsscan.py	Thu Apr 22 17:31:32 2010 -0700
@@ -3,9 +3,9 @@
 class Tokenizer(object):
     TOKENS = dict(
         whitespace=(r'\s+', re.MULTILINE),
-        string=(r'(".*(?<!\\)")'
+        string=(r'(".*?(?<!\\)")'
                 r'|'
-                r"('.*(?<!\\)')"),
+                r"('.*?(?<!\\)')"),
         c_comment=(r'\/\*.*\*\/', re.MULTILINE | re.DOTALL),
         cpp_comment=r'\/\/.*',
         name=r'[A-Za-z$_][\w]*',
--- a/test_jsscan.py	Thu Apr 22 13:38:20 2010 -0700
+++ b/test_jsscan.py	Thu Apr 22 17:31:32 2010 -0700
@@ -25,6 +25,20 @@
     >>> tokenize('  k', ignore='whitespace')
     ('name', 'k', (1, 2))
 
+Many double-quoted strings on the same line:
+
+    >>> tokenize(r'"hello there "+" dude"')
+    ('string', '"hello there "', (1, 0))
+    ('literal', '+', (1, 14))
+    ('string', '" dude"', (1, 15))
+
+Many single-quoted strings on the same line:
+
+    >>> tokenize(r"'hello there '+' dude'")
+    ('string', "'hello there '", (1, 0))
+    ('literal', '+', (1, 14))
+    ('string', "' dude'", (1, 15))
+
 Escaped double-quoted strings:
 
     >>> tokenize(r'"i say \\"tomato\\""')