# HG changeset patch
# User Atul Varma <avarma@mozilla.com>
# Date 1271982692 25200
# Node ID 30c1f55eff96842f7660409411c5976eca18533d
# Parent  ce894f57b30ce9900a2a62aeb3ca65ff65407ae2
fixed greedy regexp bug

diff -r ce894f57b30c -r 30c1f55eff96 jsscan.py
--- a/jsscan.py	Thu Apr 22 13:38:20 2010 -0700
+++ b/jsscan.py	Thu Apr 22 17:31:32 2010 -0700
@@ -3,9 +3,9 @@
 class Tokenizer(object):
     TOKENS = dict(
         whitespace=(r'\s+', re.MULTILINE),
-        string=(r'(".*(?<!\\)")'
+        string=(r'(".*?(?<!\\)")'
                 r'|'
-                r"('.*(?<!\\)')"),
+                r"('.*?(?<!\\)')"),
         c_comment=(r'\/\*.*\*\/', re.MULTILINE | re.DOTALL),
         cpp_comment=r'\/\/.*',
         name=r'[A-Za-z$_][\w]*',
diff -r ce894f57b30c -r 30c1f55eff96 test_jsscan.py
--- a/test_jsscan.py	Thu Apr 22 13:38:20 2010 -0700
+++ b/test_jsscan.py	Thu Apr 22 17:31:32 2010 -0700
@@ -25,6 +25,20 @@
     >>> tokenize('  k', ignore='whitespace')
     ('name', 'k', (1, 2))
 
+Many double-quoted strings on the same line:
+
+    >>> tokenize(r'"hello there "+" dude"')
+    ('string', '"hello there "', (1, 0))
+    ('literal', '+', (1, 14))
+    ('string', '" dude"', (1, 15))
+
+Many single-quoted strings on the same line:
+
+    >>> tokenize(r"'hello there '+' dude'")
+    ('string', "'hello there '", (1, 0))
+    ('literal', '+', (1, 14))
+    ('string', "' dude'", (1, 15))
+
 Escaped double-quoted strings:
 
     >>> tokenize(r'"i say \\"tomato\\""')